
    Ig~                     z    d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 dede
fdZdedeee
f   fd	Zdede	fd
Zy)    N)Path)Tuple)
AddedToken	Tokenizer)Compose	model_dirreturnc                     | dz  }|j                         st        d|        t        t        |            5 }t	        j
                  |      }d d d        |S # 1 sw Y   S xY w)Nzspecial_tokens_map.jsonz*Could not find special_tokens_map.json in )exists
ValueErroropenstrjsonload)r   tokens_map_pathtokens_map_file
tokens_maps       `/var/www/html/answerous/venv/lib/python3.12/site-packages/fastembed/common/preprocessor_utils.pyload_special_tokensr   	   si    ";;O!!#Ei[QRR	c/"	# 0YY/
0 0 s   AA"c                    | dz  }|j                         st        d|        | dz  }|j                         st        d|        | dz  }|j                         st        d|        t        t        |            5 }t	        j
                  |      }d d d        t        t        |            5 }t	        j
                  |      }d|v sd|v sJ d	       d|vr|d   }nd|vr|d   }nt        |d   |d         }d d d        t        |       }	t        j                  t        |            }
|
j                  
       |
j                  j                  dd      d          |	j                         D ]Q  }t        |t              r|
j                  |g       &t        |t               s7|
j                  t#        di |g       S i }|	j                         D ]^  }t        |t              r|
j%                  |      ||<   (t        |t               s9|j                  dd      }|
j%                  |      ||<   ` |
|fS # 1 sw Y   xY w# 1 sw Y   SxY w)Nzconfig.jsonzCould not find config.json in ztokenizer.jsonz!Could not find tokenizer.json in ztokenizer_config.jsonz(Could not find tokenizer_config.json in model_max_length
max_lengthz@Models without model_max_length or max_length are not supported.)r   pad_token_idr   	pad_token)pad_idr   content  )r   r   r   r   r   r   minr   r   	from_fileenable_truncationenable_paddinggetvalues
isinstanceadd_special_tokensdictr   token_to_id)r   config_pathtokenizer_pathtokenizer_config_pathconfig_fileconfigtokenizer_config_filetokenizer_configmax_contextr   	tokenizertokenspecial_token_to_id	token_strs                 r   load_tokenizerr5      s   m+K9)EFF!11N  "<YKHII%(?? '')CI;OPP	c+	 (;;'( 
c'(	) 
d-B99%:;"22lFV6V	NM	NV%55*<8K!11*+=>K./ABDTUaDbcK
d %Y/J##C$78I;7zz.!,8H8U   ""$ @eS!((%1t$((**=u*=)>?	@ ""$ NeS!)2)>)>u)E&t$		)R0I-6-B-B9-M	*N )))M( (
d 
ds   >H?0AI?I	Ic                     | dz  }|j                         st        d|        t        t        |            5 }t	        j
                  |      }t        j                  |      }d d d        |S # 1 sw Y   S xY w)Nzpreprocessor_config.jsonz+Could not find preprocessor_config.json in )r   r   r   r   r   r   r   from_config)r   preprocessor_config_pathpreprocessor_config_filepreprocessor_config
transformss        r   load_preprocessorr<   J   s    (+EE#**,FykRSS	c*+	, >0H"ii(@A(()<=
> > s   +A--A7)r   pathlibr   typingr   
tokenizersr   r   #fastembed.image.transform.operatorsr   r'   r   r5   r<   r       r   <module>rB      sW       , 74 D 3*d 3*uY_'= 3*l ' rA   