
    IgE                         d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZ d d
lmZ  G d dee         Z  G d de      Z!y)    N)get_all_start_methods)Path)	AnyDictIterableListOptionalSequenceTupleTypeUnion)Encoding)OnnxProvider)EmbeddingWorker	OnnxModelOnnxOutputContextT)load_tokenizer)
iter_batch)ParallelWorkerPoolc                       e Zd ZU dZeee      ed<   ede	d   fd       Z
dedee   fdZd fdZd	eeej$                  f   deeej$                  f   fd
Z	 	 	 ddededee   deee      dedee   ddf fdZddZdee   dee   fdZdee   defdZ	 	 	 	 	 ddededeeee   f   dedee   deee      dedeee      dee   fdZ xZ S )OnnxTextModelNONNX_OUTPUT_NAMESreturnTextEmbeddingWorkerc                     t        d      Nz%Subclasses must implement this methodNotImplementedError)clss    [/var/www/html/answerous/venv/lib/python3.12/site-packages/fastembed/text/onnx_text_model.py_get_worker_classzOnnxTextModel._get_worker_class   s    !"IJJ    outputc                     t        d      r   r   )selfr$   s     r!   _post_process_onnx_outputz'OnnxTextModel._post_process_onnx_output       !"IJJr#   c                 >    t         |           d | _        i | _        y N)super__init__	tokenizerspecial_token_to_id)r&   	__class__s    r!   r,   zOnnxTextModel.__init__   s    #% r#   
onnx_inputc                     |S )z,
        Preprocess the onnx input.
         )r&   r0   kwargss      r!   _preprocess_onnx_inputz$OnnxTextModel._preprocess_onnx_input   s
     r#   	model_dir
model_filethreads	providerscuda	device_idc                 b    t         |   ||||||       t        |      \  | _        | _        y )N)r5   r6   r7   r8   r9   r:   )r5   )r+   _load_onnx_modelr   r-   r.   )r&   r5   r6   r7   r8   r9   r:   r/   s          r!   r<   zOnnxTextModel._load_onnx_model'   s@     	 ! 	! 	
 4BI3V00r#   c                     t        d      r   r   )r&   s    r!   load_onnx_modelzOnnxTextModel.load_onnx_model:   r(   r#   	documentsc                 8    | j                   j                  |      S r*   )r-   encode_batch)r&   r?   r3   s      r!   tokenizezOnnxTextModel.tokenize=   s    ~~**955r#   c           
          | j                   |fi |}t        j                  |D cg c]  }|j                   c}      }t        j                  |D cg c]  }|j                   c}      }| j
                  j                         D ch c]  }|j                   }}dt        j                  |t        j                        i}	d|v r(t        j                  |t        j                        |	d<   d|v rbt        j                  |D cg c]0  }t        j                  t        |      t        j                        2 c}t        j                        |	d<    | j                  |	fi |}	| j
                  j                  | j                  |	      }
t        |
d   |	j                  d|      |	j                  d|            S c c}w c c}w c c}w c c}w )N	input_ids)dtypeattention_masktoken_type_idsr   )model_outputrF   rD   )rB   nparrayidsrF   model
get_inputsnameint64zeroslenr4   runr   r   get)r&   r?   r3   encodederD   rF   nodeinput_namesr0   rH   s              r!   
onnx_embedzOnnxTextModel.onnx_embed@   sp   
  $--	4V4HHW5aee56	W"E1#3#3"EF-1ZZ-B-B-DETtyyEE)288<

 {*+-88N"((+SJ'({*+-88;DEa#a&1ERXX,J'( 1T00FvF
zz~~d&<&<jI %a%>>*:NK nn[)<
 	
! 6"EE Fs   GG
G	5G
model_name	cache_dir
batch_sizeparallel
device_idsc	              +   j  K   d}
t        |t              r|g}d}
t        |t              rt        |      |k  rd}
||
rbt	        | d      r| j
                  | j                          t        ||      D ]*  }| j                  | j                  |            E d {    , y |dk(  rt        j                         }dt               v rdnd}|||d|	}t        |xs d| j                         |||	      } |j                  t        ||      fi |D ]  }| j                  |      E d {     y 7 7 	w)
NFTrL   r   
forkserverspawn)rY   rZ   r8      )num_workersworkerr9   r]   start_method)
isinstancestrlistrQ   hasattrrL   r>   r   r'   rX   os	cpu_countr   r   r"   ordered_map)r&   rY   rZ   r?   r[   r\   r8   r9   r]   r3   is_smallbatchrd   paramspools                  r!   _embed_documentszOnnxTextModel._embed_documents\   sP     i%"IHi&9~
*x4)TZZ-?$$&#Iz: R99$//%:PQQQR 1}<<>+7;P;R+R<X_L(&& 	F &$M--/%)D *))*Y
*KVvV A99%@@@A) R* As%   BD3D/BD3'D1(D31D3)r   N)NFN)   NNFN)!__name__
__module____qualname__r   r	   r   rf   __annotations__classmethodr   r"   r   r   r   r'   r,   r   rI   ndarrayr4   r   intr
   r   boolr<   r>   r   rB   rX   r   rp   __classcell__)r/   s   @r!   r   r      s   -1xS	*1K$'<"= K KK0A Khqk K&
sBJJ/	c2::o	 7;#'WW W #	W
 H\23W W C=W 
W&K6$s) 6$x. 6
9
 
	
B "&6:*./A/A /A hsm+,	/A
 /A 3-/A H\23/A /A T#Y'/A 
!/Ar#   r   c                   <    e Zd Zdeeeef      deeeef      fdZy)r   itemsr   c              #   b   K   |D ]&  \  }}| j                   j                  |      }||f ( y wr*   )rL   rX   )r&   r|   idxrm   onnx_outputs        r!   processzTextEmbeddingWorker.process   s8      	#JC**//6K{""	#s   -/N)rr   rs   rt   r   r   rx   r   r   r2   r#   r!   r   r      s/    #XeCHo6 #8E#s(O;T #r#   r   )"ri   multiprocessingr   pathlibr   typingr   r   r   r   r	   r
   r   r   r   numpyrI   
tokenizersr   fastembed.commonr   fastembed.common.onnx_modelr   r   r   r   #fastembed.common.preprocessor_utilsr   fastembed.common.utilsr   fastembed.parallel_processorr   r   r   r2   r#   r!   <module>r      sP    	 1  T T T   ) X X > - ;{AIaL {A|#/ #r#   