
    Ig~                         d dl mZmZmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlZ G d d	e      Zy)
    )AnyDictIterableListOptionalSequenceTypeUnion)OnnxProvider)Bm25)Bm42)SparseEmbeddingSparseTextEmbeddingBase)SpladePPNc                       e Zd ZU eeegZeee	      e
d<   edeeeef      fd       Z	 	 	 	 	 	 ddedee   dee   deee      ded	eee      d
ef fdZ	 	 ddeeee   f   dedee   dee   fdZdeeee   f   dee   fdZ xZS )SparseTextEmbeddingEMBEDDINGS_REGISTRYreturnc                 j    g }| j                   D ]!  }|j                  |j                                # |S )a  
        Lists the supported models.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries containing the model information.

            Example:
                ```
                [
                    {
                        "model": "prithvida/SPLADE_PP_en_v1",
                        "vocab_size": 30522,
                        "description": "Independent Implementation of SPLADE++ Model for English",
                        "license": "apache-2.0",
                        "size_in_GB": 0.532,
                        "sources": {
                            "hf": "qdrant/SPLADE_PP_en_v1",
                        },
                    }
                ]
                ```
        )r   extendlist_supported_models)clsresult	embeddings      c/var/www/html/answerous/venv/lib/python3.12/site-packages/fastembed/sparse/sparse_text_embedding.pyr   z)SparseTextEmbedding.list_supported_models   s8    0 00 	=IMM)99;<	=    
model_name	cache_dirthreads	providerscuda
device_ids	lazy_loadc           
      &   t        |   ||fi | dk(  rt        j                  dt        d       d| j
                  D ]=  }	|	j                         }
t        fd|
D              s( |	|f|||||d|| _         y  t        d d	      )
Nzprithvida/Splade_PP_en_v1zvThe right spelling is prithivida/Splade_PP_en_v1. Support of this name will be removed soon, please fix the model_name   )
stacklevelzprithivida/Splade_PP_en_v1c              3   f   K   | ](  }j                         |d    j                         k(   * yw)modelN)lower).0r(   r   s     r   	<genexpr>z/SparseTextEmbedding.__init__.<locals>.<genexpr>E   s,     ^E:##%w)=)=)??^s   .1)r   r    r!   r"   r#   zModel z~ is not supported in SparseTextEmbedding.Please check the supported models using `SparseTextEmbedding.list_supported_models()`)
super__init__warningswarnDeprecationWarningr   r   anyr(   
ValueError)selfr   r   r   r    r!   r"   r#   kwargsEMBEDDING_MODEL_TYPEsupported_models	__class__s    `         r   r-   zSparseTextEmbedding.__init__.   s     	YB6B44MMW"	 6J$($<$< 	 3IIK^M]^^1	 $')'	 	
 	 ZL !d d
 	
r   	documents
batch_sizeparallelc              +   ^   K    | j                   j                  |||fi |E d{    y7 w)a  
        Encode a list of documents into list of embeddings.
        We use mean pooling with attention so that the model can handle variable-length inputs.

        Args:
            documents: Iterator of documents or single document to embed
            batch_size: Batch size for encoding -- higher values will use more memory, but be faster
            parallel:
                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
                If 0, use all available cores.
                If None, don't use data-parallel processing, use default onnxruntime threading instead.

        Returns:
            List of embeddings, one per document
        N)r(   embed)r3   r8   r9   r:   r4   s        r   r<   zSparseTextEmbedding.embedW   s+     , $4::##Iz8NvNNNs   #-+-queryc              +   Z   K    | j                   j                  |fi |E d{    y7 w)z
        Embeds queries

        Args:
            query (Union[str, Iterable[str]]): The query to embed, or an iterable e.g. list of queries.

        Returns:
            Iterable[SparseEmbedding]: The sparse embeddings.
        N)r(   query_embed)r3   r=   r4   s      r   r?   zSparseTextEmbedding.query_embedo   s'      *4::))%:6:::s   !+)+)NNNFNF)   N)__name__
__module____qualname__r   r   r   r   r   r	   r   __annotations__classmethodr   strr   r   r   intr   r   boolr-   r
   r   r   r<   r?   __classcell__)r7   s   @r   r   r      s6   @H$PT?Ud#:;<Ud4S>&:  > $(!%6:*.'
'
 C='
 #	'

 H\23'
 '
 T#Y''
 '
X "&	Ohsm+,O O 3-	O 
/	"O0
;sHSM'9!: 
;RaIb 
;r   r   )typingr   r   r   r   r   r   r	   r
   fastembed.commonr   fastembed.sparse.bm25r   fastembed.sparse.bm42r   &fastembed.sparse.sparse_embedding_baser   r   fastembed.sparse.splade_ppr   r.   r    r   r   <module>rQ      s3    M M M ) & & 0 k;1 k;r   