
    Igq'              	           d dl Z d dlmZmZmZmZmZmZmZm	Z	 d dl
Zd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d	d
ddddd	idddddddddiddgZ G d deeej4                           Z G d de      Zy)    N)AnyDictIterableListOptionalSequenceTypeUnion)Encoding)OnnxProvider)OnnxOutputContext)define_cache_dir) LateInteractionTextEmbeddingBase)OnnxTextModelTextEmbeddingWorkerzcolbert-ir/colbertv2.0   zLate interaction modelmitg)\(?hfz
model.onnx)modeldimdescriptionlicense
size_in_GBsources
model_filez%answerdotai/answerai-colbert-small-v1`   zgText embeddings, Unimodal (text), Multilingual (~100 languages), 512 input tokens truncation, 2024 yearz
apache-2.0gp=
ף?zvespa_colbert.onnxc                   >    e Zd ZdZdZdZdZ	 d!dedede	e
j                     fdZ	 d!d	eee
j                  f   ded
edeee
j                  f   fdZd!dee   ded
edee   fdZdedee   fdZdee   dee   fdZedeeeef      fd       Z	 	 	 	 	 	 	 d"dedee   dee   deee      dedeee      dedee   f fdZd#dZ	 	 d$deee	e   f   dedee   de	e
j                     fdZdeee	e   f   de	e
j                     fdZ ede!e"   fd        Z# xZ$S )%Colbert         z[MASK]outputis_docreturnc                    |s)|j                   j                  t        j                        S |j                  |j
                  t        d      t        |j                        D ]G  \  }}t        |      D ]4  \  }}|| j                  v s|| j                  k(  s$d|j
                  ||f<   6 I |xj                   t        j                  |j
                  d      j                  t        j                        z  c_         t        j                  j                  |j                   ddd      }t        j                  |d      }|xj                   |z  c_         |j                   j                  t        j                        S )NzJinput_ids and attention_mask must be provided for document post-processingr   r    T)ordaxiskeepdimsg-q=)model_outputastypenpfloat32	input_idsattention_mask
ValueError	enumerate	skip_listpad_token_idexpand_dimslinalgnormmaximum)	selfr"   r#   itoken_sequencejtoken_idr5   norm_clampeds	            _/var/www/html/answerous/venv/lib/python3.12/site-packages/fastembed/late_interaction/colbert.py_post_process_onnx_outputz!Colbert._post_process_onnx_output0   s>    &&--bjj99#v'<'<'D\  "+6+;+;!< 	4A~(8 48t~~-T=N=N1N23F))!Q$/4	4
 	r~~f.C.CQGNNrzzZZyy~~f11qq4~Pzz$.|+""))"**55    
onnx_inputkwargsc                     |r| j                   n| j                  }t        j                  |d   d|d      |d<   t        j                  |d   ddd      |d<   |S )Nr-   r   )r'   r.   )DOCUMENT_MARKER_TOKEN_IDQUERY_MARKER_TOKEN_IDr+   insert)r7   r@   r#   rA   marker_tokens        r=   _preprocess_onnx_inputzColbert._preprocess_onnx_inputF   s`     9?t44DD^D^"$))J{,CQ[\"]
;')yy<L1MqRSZ['\
#$r?   	documentsc                 r    |r| j                  |      S | j                  t        t        |                  S )N)rH   )query)_tokenize_documents_tokenize_querynextiter)r7   rH   r#   rA   s       r=   tokenizezColbert.tokenizeN   s@      $$y$9	
 %%Di,A%B	
r?   rJ   c                    | j                   j                  |g      }t        |d   j                        | j                  k  rd }| j                   j
                  r| j                   j
                  }| j                   j                  | j                  | j                  | j                         | j                   j                  |g      }|| j                   j                          |S  | j                   j                  di | |S )Nr   )	pad_tokenpad_idlength )
	tokenizerencode_batchlenidsMIN_QUERY_LENGTHpaddingenable_padding
MASK_TOKENmask_token_id
no_padding)r7   rJ   encodedprev_paddings       r=   rL   zColbert._tokenize_queryU   s    ..--ug6wqz~~!6!66L~~%%#~~55NN))//)),, * 
 nn115':G#))+  .--==r?   c                 <    | j                   j                  |      }|S N)rU   rV   )r7   rH   r_   s      r=   rK   zColbert._tokenize_documentsh   s    ..--i8r?   c                     t         S )zLists the supported models.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries containing the model information.
        )supported_colbert_modelsclss    r=   list_supported_modelszColbert.list_supported_modelsl   s
     ('r?   
model_name	cache_dirthreads	providerscuda
device_ids	lazy_load	device_idc	                    t        
|   |||fi |	 || _        || _        || _        || _        ||| _        n(| j                  | j                  d   | _        nd| _        | j                  |      | _        t        |      | _
        | j                  | j                  | j                  | j                        | _        d| _        d| _        t!               | _        | j                  s| j%                          yy)a  
        Args:
            model_name (str): The name of the model to use.
            cache_dir (str, optional): The path to the cache directory.
                                       Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                       Defaults to `fastembed_cache` in the system's temp directory.
            threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
            providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
                Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
            cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
                Defaults to False.
            device_ids (Optional[List[int]], optional): The list of device ids to use for data parallel processing in
                workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
            lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
            device_id (Optional[int], optional): The device id to use for loading the model in the worker process.

        Raises:
            ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
        Nr   )local_files_only)super__init__rk   rn   rm   rl   ro   _get_model_descriptionmodel_descriptionr   ri   download_model_local_files_only
_model_dirr]   r2   setr1   load_onnx_model)r7   rh   ri   rj   rk   rl   rm   rn   ro   rA   	__class__s             r=   rs   zColbert.__init__u   s    B 	YB6B"" %	  &DN__(!__Q/DN!DN!%!<!<Z!H))4--""DNNTE[E[ . 
 " ~~  " r?   c                    | j                  | j                  | j                  d   | j                  | j                  | j
                  | j                         | j                  | j                     | _	        | j                  j                  d   | _        t        j                  D ch c],  }| j                  j                  |d      j                   d   . c}| _        y c c}w )Nr   )	model_dirr   rj   rk   rl   ro   rR   F)add_special_tokensr   )_load_onnx_modelrx   ru   rj   rk   rl   ro   special_token_to_idr\   r]   rU   rZ   r2   stringpunctuationencoderX   r1   )r7   symbols     r=   rz   zColbert.load_onnx_model   s    oo--l;LLnnnn 	 	
 "55dooF NN228< !,,
 NN!!&U!CGGJ
 
s   %1C
batch_sizeparallelc              +      K    | j                   d| j                  t        | j                        |||| j                  | j
                  | j                  d|E d{    y7 w)a  
        Encode a list of documents into list of embeddings.
        We use mean pooling with attention so that the model can handle variable-length inputs.

        Args:
            documents: Iterator of documents or single document to embed
            batch_size: Batch size for encoding -- higher values will use more memory, but be faster
            parallel:
                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
                If 0, use all available cores.
                If None, don't use data-parallel processing, use default onnxruntime threading instead.

        Returns:
            List of embeddings, one per document
        )rh   ri   rH   r   r   rk   rl   rm   NrT   )_embed_documentsrh   strri   rk   rl   rm   )r7   rH   r   r   rA   s        r=   embedzColbert.embed   sb     , )4(( 

$..)!nn

 

 
	
 
	
s   AA$A"A$c              +      K   t        |t              r|g}t        | d      r| j                  | j	                          |D ]/  }| j                  | j                  |gd      d      E d {    1 y 7 w)Nr   F)r#   )
isinstancer   hasattrr   rz   r>   
onnx_embed)r7   rJ   rA   texts       r=   query_embedzColbert.query_embed   s{     eS!GEtW%);  " 	D55u5e 6   	s   A)A5+A3,A5c                     t         S rb   )ColbertEmbeddingWorkerre   s    r=   _get_worker_classzColbert._get_worker_class   s    %%r?   )T)NNNFNFN)r$   N)   N)%__name__
__module____qualname__rD   rC   rY   r\   r   boolr   r+   ndarrayr>   r   r   r   rG   r   r   rO   rL   rK   classmethodrg   r   intr   r   rs   rz   r
   r   r   r	   r   r   __classcell__)r{   s   @r=   r   r   *   s=    J 9=6'6156	"**	6. AEsBJJ/9=PS	c2::o	
$s) 
T 
C 
TXYaTb 
S T(^ &T#Y 4>  (d4S>&: ( ( $(!%6:*.#'<#<# C=<# #	<#
 H\23<# <# T#Y'<# <# C=<#|
& "&	 
hsm+, 
  
 3-	 
 
"**	 
D
sHSM'9!: 
RTR\R\I] 
 &$':"; & &r?   r   c                        e Zd ZdededefdZy)r   rh   ri   r$   c                      t        d||dd|S )Nr   )rh   ri   rj   rT   )r   )r7   rh   ri   rA   s       r=   init_embeddingz%ColbertEmbeddingWorker.init_embedding   s'     
!
 	
 	
r?   N)r   r   r   r   r   r   rT   r?   r=   r   r      s    
 
 
7 
r?   r   )r   typingr   r   r   r   r   r   r	   r
   numpyr+   
tokenizersr   fastembed.commonr   fastembed.common.onnx_modelr   fastembed.common.utilsr   :fastembed.late_interaction.late_interaction_embedding_baser   fastembed.text.onnx_text_modelr   r   rd   r   r   r   rT   r?   r=   <module>r      s     M M M   ) 9 3 N
 */*
 #
 9 A9
 +
 4I&.bjj0I I&X
0 
r?   