
    :Qg_                         d dl Z d dlmZmZ d dlmZmZmZ d dlZ	d dl
mZmZ d dlmZ d dlmZmZ d dlmZ dZd	Zd
ZdZdZdZerd dlmZ d dlmZ  G d de      Ze G d de             Zy)    N)	dataclassfield)TYPE_CHECKINGListOptional)Field	SecretStr)Element)BaseEmbeddingEncoderEmbeddingConfig)requires_dependenciesz@mixedbread-ai/unstructured   <      floatendMixedbreadAIRequestOptionsc                   p    e Zd ZU dZ ed       Zeed<    ed      Ze	ed<    e
dgd	
      dd       Zy)MixedbreadAIEmbeddingConfigz
    Configuration class for Mixedbread AI Embedding Encoder.

    Attributes:
        api_key (str): API key for accessing Mixedbread AI..
        model_name (str): Name of the model to use for embeddings.
    c                  R    t        t        j                  j                  d            S )NMXBAI_API_KEY)r	   osenvironget     \/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/embed/mixedbreadai.py<lambda>z$MixedbreadAIEmbeddingConfig.<lambda>#   s    	"**..*I J r   )default_factoryapi_keyz"mixedbread-ai/mxbai-embed-large-v1)default
model_namemixedbread_aizembed-mixedbreadai)extrasc                 P    ddl m}  || j                  j                               S )zr
        Create the Mixedbread AI client.

        Returns:
            MixedbreadAI: Initialized client.
        r   r   )r#   )mixedbread_ai.clientr   r#   get_secret_value)selfr   s     r    
get_clientz&MixedbreadAIEmbeddingConfig.get_client*   s$     	6LL113
 	
r   N)returnr   )__name__
__module____qualname____doc__r   r#   r	   __annotations__r%   strr   r,   r   r   r    r   r      sU     JGY  4J  	#
	
r   r   c                   2   e Zd ZU dZeed<    edd      Zee	e
      ed<    edd      Zed   ed<   d	e	e
   fd
Zd Zed        Zed	efd       Zde	e   d	e	e	e
      fdZede	e   de	e	e
      d	e	e   fd       Zde	e   d	e	e   fdZded	e	e
   fdZy)MixedbreadAIEmbeddingEncoderz
    Embedding encoder for Mixedbread AI.

    Attributes:
        config (MixedbreadAIEmbeddingConfig): Configuration for the embedding encoder.
    configFN)initr$   _exemplary_embeddingr   _request_optionsr-   c                 ,    | j                  dg      d   S )zJGet an exemplary embedding to determine dimensions and unit vector status.Qr   _embed)r+   s    r    get_exemplary_embeddingz4MixedbreadAIEmbeddingEncoder.get_exemplary_embeddingJ   s    {{C5!!$$r   c                     | j                   j                  t        d      ddlm}  |t
        t        dt        i      | _        y )NzThe Mixedbread AI API key must be specified.You either pass it in the constructor using 'api_key'or via the 'MXBAI_API_KEY' environment variable.r   r   z
User-Agent)max_retriestimeout_in_secondsadditional_headers)	r6   r#   
ValueErrormixedbread_ai.corer   MAX_RETRIESTIMEOUT
USER_AGENTr9   )r+   r   s     r    
initializez'MixedbreadAIEmbeddingEncoder.initializeN   sE    ;;&E  	6 .#& ,j9!
r   c                 L    | j                         }t        j                  |      S )z0Get the number of dimensions for the embeddings.)r>   npshaper+   exemplary_embeddings     r    num_of_dimensionsz.MixedbreadAIEmbeddingEncoder.num_of_dimensions^   s#     #::<xx+,,r   c                     | j                         }t        j                  t        j                  j	                  |      d      S )z(Check if the embedding is a unit vector.g      ?)r>   rJ   iscloselinalgnormrL   s     r    is_unit_vectorz+MixedbreadAIEmbeddingEncoder.is_unit_vectord   s2     #::<zz"))..)<=sCCr   textsc           	         t         }t        dt        |      |      }g }| j                  j	                         }|D ]X  }||||z    }|j                  | j                  j                  dt        t        | j                  |      }|j                  |       Z |D 	cg c]  }|j                  D ]  }	|	j                   ! c}	}S c c}	}w )z
        Embed a list of texts using the Mixedbread AI API.

        Args:
            texts (List[str]): List of texts to embed.

        Returns:
            List[List[float]]: List of embeddings.
        r   T)model
normalizedencoding_formattruncation_strategyrequest_optionsinput)
BATCH_SIZErangelenr6   r,   
embeddingsr%   ENCODING_FORMATTRUNCATION_STRATEGYr9   appenddata	embedding)
r+   rT   
batch_size	batch_itr	responsesclientibatchresponseitems
             r    r=   z#MixedbreadAIEmbeddingEncoder._embedj   s      
!SZ4		'') 
	'A!a*n-E((kk,, /$7 $ 5 5 ) H X&
	' 09S8X]]STSSSSs   $Celementsr_   c                     t        |       t        |      k(  sJ g }t        |       D ]   \  }}||   |_        |j                  |       " | S )a  
        Add embeddings to elements.

        Args:
            elements (List[Element]): List of elements.
            embeddings (List[List[float]]): List of embeddings.

        Returns:
            List[Element]: Elements with embeddings added.
        )r^   	enumerater_   rb   )rm   r_   elements_w_embeddingri   elements        r    _add_embeddings_to_elementsz8MixedbreadAIEmbeddingEncoder._add_embeddings_to_elements   sY     8}J///!#H- 	1JAw!+AG ''0	1 r   c                     | j                  |D cg c]  }t        |       c}      }| j                  ||      S c c}w )z
        Embed a list of document elements.

        Args:
            elements (List[Element]): List of document elements.

        Returns:
            List[Element]: Elements with embeddings.
        )r=   r3   rr   )r+   rm   er_   s       r    embed_documentsz,MixedbreadAIEmbeddingEncoder.embed_documents   s9     [[(!;Q#a&!;<
//*EE "<s   ;queryc                 ,    | j                  |g      d   S )z
        Embed a query string.

        Args:
            query (str): Query string to embed.

        Returns:
            List[float]: Embedding of the query.
        r   r<   )r+   rv   s     r    embed_queryz(MixedbreadAIEmbeddingEncoder.embed_query   s     {{E7#A&&r   )r.   r/   r0   r1   r   r2   r   r8   r   r   r   r9   r>   rH   propertyrN   boolrS   r3   r=   staticmethodr
   rr   ru   rx   r   r   r    r5   r5   <   s    ('27UD2Q(4;/Q38eT3Rh/0R%e %
  - -
 D D D
TDI T$tE{*; T8 w--1$u+->	g (FW F$w- F
' 
'e 
'r   r5   ) r   dataclassesr   r   typingr   r   r   numpyrJ   pydanticr   r	   unstructured.documents.elementsr
   unstructured.embed.interfacesr   r   unstructured.utilsr   rG   r\   rF   rE   r`   ra   r)   r   rD   r   r   r5   r   r   r    <module>r      sv    	 ( 0 0  % 3 O 4*


  11 
/  
F u'#7 u' u'r   