
    Ig              	           d dl mZmZmZmZmZmZmZmZ d dl	Z
d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ dd	d
ddddidddd	d
ddddiddgZ G d deee         Z G d de      Zy)    )AnyDictIterableListOptionalSequenceTypeUnionN)OnnxProvider)OnnxOutputContext)define_cache_dir)SparseEmbeddingSparseTextEmbeddingBase)OnnxTextModelTextEmbeddingWorkerzprithivida/Splade_PP_en_v1i:w  z9Independent Implementation of SPLADE++ Model for English.z
apache-2.0g/$?hfzQdrant/SPLADE_PP_en_v1z
model.onnx)model
vocab_sizedescriptionlicense
size_in_GBsources
model_filezprithvida/Splade_PP_en_v1c                       e Zd Zdedee   fdZedee	e
ef      fd       Z	 	 	 	 	 	 	 dde
dee
   dee   deee      d	ed
eee      dedee   f fdZddZ	 	 ddee
ee
   f   dedee   dee   fdZedee   fd       Z xZS )SpladePPoutputreturnc              #   x  K   |j                   t        d      t        j                  dt        j                  |j
                  d      z         }|t        j                  |j                   d      z  }t        j                  |d      }|D ])  }|j                         d   }||   }t        ||       + y w)Nz<attention_mask must be provided for document post-processing   r   )axis)valuesindices)
attention_mask
ValueErrornplogmaximummodel_outputexpand_dimsmaxnonzeror   )selfr   relu_logweighted_logscores
row_scoresr#   s          W/var/www/html/answerous/venv/lib/python3.12/site-packages/fastembed/sparse/splade_pp.py_post_process_onnx_outputz"SpladePP._post_process_onnx_output(   s       ([\\66!bjj)<)<a@@A"..1F1FR"PP1- ! 	BJ ((*1-G(F!AA	Bs   B8B:c                     t         S )zLists the supported models.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries containing the model information.
        )supported_splade_modelsclss    r2   list_supported_modelszSpladePP.list_supported_models9   s
     '&    
model_name	cache_dirthreads	providerscuda
device_ids	lazy_load	device_idc	                    t        
|   |||fi |	 || _        || _        || _        || _        ||| _        n(| j                  | j                  d   | _        nd| _        | j                  |      | _        t        |      | _
        | j                  | j                  | j                  | j                        | _        | j                  s| j                          yy)a  
        Args:
            model_name (str): The name of the model to use.
            cache_dir (str, optional): The path to the cache directory.
                                       Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                       Defaults to `fastembed_cache` in the system's temp directory.
            threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
            providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
                Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
            cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
                Defaults to False.
            device_ids (Optional[List[int]], optional): The list of device ids to use for data parallel processing in
                workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
            lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
            device_id (Optional[int], optional): The device id to use for loading the model in the worker process.

        Raises:
            ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
        Nr   )local_files_only)super__init__r=   r@   r?   r>   rA   _get_model_descriptionmodel_descriptionr   r;   download_model_local_files_only
_model_dirload_onnx_model)r-   r:   r;   r<   r=   r>   r?   r@   rA   kwargs	__class__s             r2   rE   zSpladePP.__init__B   s    @ 	YB6B"" %	  &DN__(!__Q/DN!DN!%!<!<Z!H))4--""DNNTE[E[ . 
 ~~  " r9   c                     | j                  | j                  | j                  d   | j                  | j                  | j
                  | j                         y )Nr   )	model_dirr   r<   r=   r>   rA   )_load_onnx_modelrJ   rG   r<   r=   r>   rA   )r-   s    r2   rK   zSpladePP.load_onnx_model|   sF    oo--l;LLnnnn 	 	
r9   	documents
batch_sizeparallelc              +      K    | j                   d| j                  t        | j                        |||| j                  | j
                  | j                  d|E d{    y7 w)a  
        Encode a list of documents into list of embeddings.
        We use mean pooling with attention so that the model can handle variable-length inputs.

        Args:
            documents: Iterator of documents or single document to embed
            batch_size: Batch size for encoding -- higher values will use more memory, but be faster
            parallel:
                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
                If 0, use all available cores.
                If None, don't use data-parallel processing, use default onnxruntime threading instead.

        Returns:
            List of embeddings, one per document
        )r:   r;   rQ   rR   rS   r=   r>   r?   N )_embed_documentsr:   strr;   r=   r>   r?   )r-   rQ   rR   rS   rL   s        r2   embedzSpladePP.embed   sb     , )4(( 

$..)!nn

 

 
	
 
	
s   AA$A"A$c                     t         S )N)SpladePPEmbeddingWorkerr6   s    r2   _get_worker_classzSpladePP._get_worker_class   s    &&r9   )NNNFNFN)r   N)   N)__name__
__module____qualname__r   r   r   r3   classmethodr   r   rW   r   r8   r   intr   r   boolrE   rK   r
   rX   r	   r   r[   __classcell__)rM   s   @r2   r   r   '   sB   B0A BhF_ B" 'd4S>&: ' ' $(!%6:*.#'8#8# C=8# #	8#
 H\238# 8# T#Y'8# 8# C=8#t
 "&	 
hsm+, 
  
 3-	 
 
/	" 
D '$':"; ' 'r9   r   c                        e Zd ZdededefdZy)rZ   r:   r;   r   c                      t        d||dd|S )Nr   )r:   r;   r<   rU   )r   )r-   r:   r;   rL   s       r2   init_embeddingz&SpladePPEmbeddingWorker.init_embedding   s'     
!
 	
 	
r9   N)r]   r^   r_   rW   r   rf   rU   r9   r2   rZ   rZ      s    
 
 
8 
r9   rZ   )typingr   r   r   r   r   r   r	   r
   numpyr&   fastembed.commonr   fastembed.common.onnx_modelr   fastembed.common.utilsr   &fastembed.sparse.sparse_embedding_baser   r   fastembed.text.onnx_text_modelr   r   r5   r   rZ   rU   r9   r2   <module>rn      s    M M M  ) 9 3 N .R*
 #
 -R*
 #
 4C'&o(F C'L
1 
r9   