
    +#h                    b    d dl mZ d dlZd dlZd dlmZ d dlmZ  G d dej                        Zy)    )annotationsN)Image)nnc                       e Zd ZU dZded<   dd fdZddZedd       Zej                  dd       ZddZ
ddd	Zedd
       ZddZedd       Z xZS )	CLIPModelTboolsave_in_rootc                    t         |           ||}t        j                  j	                  |      | _        t        j                  j	                  |      | _        y N)super__init__transformersr   from_pretrainedmodelCLIPProcessor	processor)self
model_nameprocessor_name	__class__s      c/var/www/html/sandstorm/venv/lib/python3.12/site-packages/sentence_transformers/models/CLIPModel.pyr   zCLIPModel.__init__   sI    !'N!++;;JG
%33CCNS    c                     y)NzCLIPModel() r   s    r   __repr__zCLIPModel.__repr__   s    r   c                B    | j                   j                  j                  S r   r   	tokenizermodel_max_lengthr   s    r   max_seq_lengthzCLIPModel.max_seq_length   s    ~~''888r   c                :    || j                   j                  _        y r   r   )r   values     r   r!   zCLIPModel.max_seq_length   s    49  1r   c           
        g }g }d|v r=| j                   j                  |d         }| j                   j                  |d         }d|v r| j                   j                  |j	                  d      |j	                  dd       |j	                  dd       |j	                  dd       |j	                  dd       	      }| j                   j                  |d         }g }t        |      }t        |      }t        |d
         D ]?  \  }	}
|
dk(  r|j                  t        |             &|j                  t        |             A t        j                  |      j                         |d<   |S )Npixel_values)r%      	input_idsattention_maskposition_idsoutput_attentionsoutput_hidden_states)r'   r(   r)   r*   r+   image_text_infor   sentence_embedding)r   vision_modelvisual_projection
text_modelgettext_projectioniter	enumerateappendnexttorchstackfloat)r   featuresimage_embedstext_embedsvision_outputstext_outputsr-   image_featurestext_featuresidx
input_types              r   forwardzCLIPModel.forward    sU   X%!ZZ44(>BZ4[N::77q8IJL("::00",,{3'||,<dC%\\.$?"*,,/BD"I%-\\2H$%O 1 L **44\!_EKl+[)(2C)DE 	?OCQ"))$~*>?"))$}*=>		? */5G)H)N)N)P%&r   c                   g }g }g }t        |      D ]d  \  }}t        |t        j                        r#|j                  |       |j                  d       C|j                  |       |j                  d       f i }t	        |      r| j
                  j                  ||dd      }t	        |      r,| j
                  j                  |d      }	|	j                  |d<   ||d<   t        |      S )	Nr   r&   Tpt)padding
truncationreturn_tensors)rH   r%   r,   )
r4   
isinstancer   r5   lenr   r   image_processorr%   dict)
r   textsrF   imagestexts_valuesr,   rA   dataencodingr?   s
             r   tokenizezCLIPModel.tokenize@   s    "5) 	*IC$,d#&&q)##D)&&q)	* |~~//gZ^os/tHv;!^^;;FSW;XN'5'B'BH^$&5"#H~r   c                    | j                   S r   )r   r   s    r   r   zCLIPModel.tokenizerX   s    ~~r   c                p    | j                   j                  |       | j                  j                  |       y r   )r   save_pretrainedr   )r   output_paths     r   savezCLIPModel.save\   s&    

"";/&&{3r   c                    t        |       S )N)r   )r   )
input_paths    r   loadzCLIPModel.load`   s    J//r   )zopenai/clip-vit-base-patch32N)r   strreturnNone)r\   r[   )r\   int)r#   r^   r\   r]   )r:   dict[str, torch.Tensor]r\   r_   )T)rF   z
str | boolr\   r_   )r\   ztransformers.CLIPProcessor)rV   r[   r\   r]   )rY   r[   r\   r   )__name__
__module____qualname__r	   __annotations__r   r   propertyr!   setterrC   rR   r   rW   staticmethodrZ   __classcell__)r   s   @r   r   r   	   s~    L$T 9 9 : :@0  4 0 0r   r   )	
__future__r   r7   r   PILr   r   Moduler   r   r   r   <module>rk      s&    "    Y0		 Y0r   