
    +#hC                        d Z ddlZddlmZmZmZmZmZ ddlZddl	m
Z
 	 ddlmZ ddlmZ ddlmZ ddlmZmZmZ 	 ddlZ G d dej2                        Zy# e$ r	  ed	      w xY w# e$ r  ej.                  d
e       Y Bw xY w)zf
Retriever module for Azure AI Search
Author: Prajapati Harishkumar Kishorkumar (@HARISHKUMAR1112001)
    N)AnyCallableListOptionalUniondotdict)AzureKeyCredential)SearchClient)SearchItemPaged)	QueryTypeVectorFilterModeVectorizedQueryzvYou need to install azure-search-documents libraryPlease use the command: pip install azure-search-documents==11.6.0b1zd`openai` is not installed. Install it with `pip install openai` to use AzureOpenAI embedding models.)categoryc            *           e Zd ZdZddddddddddej
                  dddddfd	ed
edededee   dedee	j                     dee   dee   dedededededee   dedee   dee   dee   deej                     f( fdZdeded ed!ededededededededededededef d"Zd#ed$ed%efd&Zd'eeee   f   dee   d(ej.                  fd)Zd ed*eded(eez  fd+Zd, Z xZS )-AzureAISearchRMa  
    A retrieval module that utilizes Azure AI Search to retrieve top passages for a given query.

    Args:
        search_service_name (str): The name of the Azure AI Search service.
        search_api_key (str): The API key for accessing the Azure AI Search service.
        search_index_name (str): The name of the search index in the Azure AI Search service.
        field_text (str): The name of the field containing text content in the search index. This field will be mapped to the "content" field in the dsp framework.
        field_vector (Optional[str]): The name of the field containing vector content in the search index. Defaults to None.
        k (int, optional): The default number of top passages to retrieve. Defaults to 3.
        azure_openai_client (Optional[openai.AzureOpenAI]): An instance of the AzureOpenAI client. Either openai_client or embedding_func must be provided. Defaults to None.
        openai_embed_model (Optional[str]): The name of the OpenAI embedding model. Defaults to "text-embedding-ada-002".
        embedding_func (Optional[Callable]): A function for generating embeddings. Either openai_client or embedding_func must be provided. Defaults to None.
        semantic_ranker (bool, optional): Whether to use semantic ranking. Defaults to False.
        filter (str, optional): Additional filter query. Defaults to None.
        query_language (str, optional): The language of the query. Defaults to "en-Us".
        query_speller (str, optional): The speller mode. Defaults to "lexicon".
        use_semantic_captions (bool, optional): Whether to use semantic captions. Defaults to False.
        query_type (Optional[QueryType], optional): The type of query. Defaults to QueryType.FULL.
        semantic_configuration_name (str, optional): The name of the semantic configuration. Defaults to None.
        is_vector_search (Optional[bool]): Whether to enable vector search. Defaults to False.
        is_hybrid_search (Optional[bool]): Whether to enable hybrid search. Defaults to False.
        is_fulltext_search (Optional[bool]): Whether to enable fulltext search. Defaults to True.
        vector_filter_mode (Optional[VectorFilterMode]): The vector filter mode. Defaults to None.

    Examples:
        Below is a code snippet that demonstrates how to instantiate and use the AzureAISearchRM class:
        ```python
        search_service_name = "your_search_service_name"
        search_api_key = "your_search_api_key"
        search_index_name = "your_search_index_name"
        field_text = "text_content_field"

        azure_search_retriever = AzureAISearchRM(search_service_name, search_api_key, search_index_name, field_text)
        ```

    Attributes:
        search_service_name (str): The name of the Azure AI Search service.
        search_api_key (str): The API key for accessing the Azure AI Search service.
        search_index_name (str): The name of the search index in the Azure AI Search service.
        endpoint (str): The endpoint URL for the Azure AI Search service.
        field_text (str): The name of the field containing text content in the search index.
        field_vector (Optional[str]): The name of the field containing vector content in the search index.
        azure_openai_client (Optional[openai.AzureOpenAI]): An instance of the AzureOpenAI client.
        openai_embed_model (Optional[str]): The name of the OpenAI embedding model.
        embedding_func (Optional[Callable]): A function for generating embeddings.
        credential (AzureKeyCredential): The Azure key credential for accessing the service.
        client (SearchClient): The Azure AI Search client instance.
        semantic_ranker (bool): Whether to use semantic ranking.
        filter (str): Additional filter query.
        query_language (str): The language of the query.
        query_speller (str): The speller mode.
        use_semantic_captions (bool): Whether to use semantic captions.
        query_type (Optional[QueryType]): The type of query.
        semantic_configuration_name (str): The name of the semantic configuration.
        is_vector_search (Optional[bool]): Whether to enable vector search.
        is_hybrid_search (Optional[bool]): Whether to enable hybrid search.
        is_fulltext_search (Optional[bool]): Whether to enable fulltext search.
        vector_filter_mode (Optional[VectorFilterMode]): The vector filter mode.

    Methods:
        forward(query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction:
            Search for the top passages corresponding to the given query or queries.

        azure_search_request(
            self,
            key_content: str,
            client: SearchClient,
            query: str,
            top: int,
            semantic_ranker: bool,
            filter: str,
            query_language: str,
            query_speller: str,
            use_semantic_captions: bool,
            query_type: QueryType,
            semantic_configuration_name: str,
            is_vector_search: bool,
            is_hybrid_search: bool,
            is_fulltext_search: bool,
            field_vector: str,
            vector_filter_mode: VectorFilterMode
        ) -> List[dict]:
            Perform a search request to the Azure AI Search service.

        process_azure_result(
            self,
            results:SearchItemPaged,
            content_key:str,
            content_score: str
        ) -> List[dict]:
            Process the results received from the Azure AI Search service and map them to the correct format.

        get_embeddings(
            self,
            query: str,
            k_nearest_neighbors: int,
            field_vector: str
        ) -> List | Any:
            Returns embeddings for the given query.

        check_semantic_configuration(
            self,
            semantic_configuration_name,
            query_type
        ):
            Checks semantic configuration.

    Raises:
        ImportError: If the required Azure AI Search libraries are not installed.

    Note:
        This class relies on the 'azure-search-documents' library for interacting with the Azure AI Search service.
        Ensure that you have the necessary permissions and correct configurations set up in Azure before using this class.
    N   ztext-embedding-ada-002Fzen-UslexiconTsearch_service_namesearch_api_keysearch_index_name
field_textfield_vectorkazure_openai_clientopenai_embed_modelembedding_funcsemantic_rankerfilterquery_languagequery_spelleruse_semantic_captions
query_typesemantic_configuration_nameis_vector_searchis_hybrid_searchis_fulltext_searchvector_filter_modec                    || _         || _        || _        d| j                    d| _        || _        || _        || _        || _        |	| _        t        | j                        | _
        t        | j                  | j                  | j                        | _        |
| _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        t0        | e  |       y )Nzhttps://z.search.windows.net)endpoint
index_name
credential)r   )r   r   r   r*   r   r   r   r   r   r
   r,   r   clientr   r   r    r!   r"   r#   r$   r%   r&   r'   r(   super__init__)selfr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   	__class__s                        [/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/retrieve/azureaisearch_rm.pyr/   zAzureAISearchRM.__init__   s    . $7 ,!2"4#;#;"<<OP$(#6 "4,,T-@-@A"]]t/E/ERVRaRa
  /,*%:"$+F( 0 0"4"41    key_contentr-   querytopc                 ^   |r]| j                  |||      }|r1| j                  ||
       |j                  d||
|g||||	rdnd      }n|j                  d||g||      }|ra| j                  |||      }|r3| j                  ||
       |j                  |||
|||||g||	rdnd
      }n|j                  ||||||g|      }|rF|r0| j                  ||
       |j                  |||
|||||	rdnd      }n|j                  |||      }t        d	 d
      }| j	                  |||      }|S )z1
        Search in Azure AI Search Index
        Nzextractive|highlight-false)search_textr   r#   vector_queriesr(   r$   r6   query_caption)r8   r   r9   r(   r6   )
r8   r   r#   r    r!   r$   r6   r9   r(   r:   )r8   r   r    r!   r6   r9   r(   )r8   r   r#   r    r!   r$   r6   r:   )r8   r6   r   c                     | d   S )Nz@search.score )xs    r2   <lambda>z6AzureAISearchRM.azure_search_request.<locals>.<lambda>   s    q7I r3   T)keyreverse)get_embeddingscheck_semantic_configurationsearchsortedprocess_azure_result)r0   r4   r-   r5   r6   r   r   r    r!   r"   r#   r$   r%   r&   r'   r   r(   vector_queryresultssorted_resultss                       r2   azure_search_requestz$AzureAISearchRM.azure_search_request   s   . ..uc<HL112MzZ -- $!)$0>'90KCX#?^b ( 	 !-- $!$0>'9 (  ..uc<HL112MzZ -- %!)#1"/0K$0>'9CX#?^b (  !-- %!#1"/$0>'9 (  112MzZ -- %!)#1"/0KCX#?^b ( 	 !--Es6-R-ISWX22>;P[\r3   rG   content_keycontent_scorec                     g }|D ]H  }i }|j                         D ]   \  }}||k(  r||d<   ||k(  r||d<   |||<   " |j                  |       J |S )z~
        process received result from Azure AI Search as dictionary array and map content and score to correct format
        textscore)itemsappend)	r0   rG   rJ   rK   resresulttmpr?   values	            r2   rE   z$AzureAISearchRM.process_azure_result&  su      		FC$lln %
U+%"'CKM)#(CL$CH% JJsO		 
r3   query_or_queriesreturnc                    ||n| j                   }t        |t              r|gn|}|D cg c]  }|s|	 }}g }|D ]  }| j                  | j                  | j
                  ||| j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                        }|j%                  d |D                |S c c}w )a  
        Search with pinecone for self.k top passages for query

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.

        Returns:
            dspy.Prediction: An object containing the retrieved passages.
        c              3   <   K   | ]  }t        d |d   i        yw)	long_textrM   Nr   ).0ds     r2   	<genexpr>z*AzureAISearchRM.forward.<locals>.<genexpr>Z  s     O!G[!F)$<=Os   )r   
isinstancestrrI   r   r-   r   r   r    r!   r"   r#   r$   r%   r&   r'   r   r(   extend)r0   rU   r   queriesqpassagesr5   rG   s           r2   forwardzAzureAISearchRM.forward7  s     ADFF(23CS(I#$O_%+1++ 	PE//$$##""**00%%%%''!!''!G$ OOOwOO'	P* 1 ,s
   DDk_nearest_neighborsc                 >   | j                   s| j                  sJ d       | j                   b|sJ d       | j                   j                  j                  || j                        j
                  d   j                  }t        |||      }|gS | j                  |      S )as  
        Returns embeddings for the given query.

        Args:
            query (str): The query for which embeddings are to be retrieved.
            k_nearest_neighbors (int): The number of nearest neighbors to consider.
            field_vector (str): The field vector to use for embeddings.

        Returns:
            list: A list containing the vectorized query.
            Any: The result of embedding_func if azure_openai_client is not provided.

        Raises:
            AssertionError: If neither azure_openai_client nor embedding_func is provided,
                or if field_vector is not provided.
        z>Either azure_openai_client or embedding_func must be provided.zfield_vector must be provided.)inputmodelr   )vectorrd   fields)r   r   
embeddingscreater   data	embeddingr   )r0   r5   rd   r   rm   rF   s         r2   rA   zAzureAISearchRM.get_embeddings^  s    $ $$(;(;	LK	L; ##/A!AA< ((33::dNeNe:fkklmnxx  + 6IR^L !>!&&u--r3   c                 J    |sJ d       |t         j                  k(  sJ d       y)aW  
        Checks semantic configuration.

        Args:
            semantic_configuration_name: The name of the semantic configuration.
            query_type: The type of the query.

        Raises:
            AssertionError: If semantic_configuration_name is not provided
                or if query_type is not QueryType.SEMANTIC.
        z-Semantic configuration name must be provided.z&Query type must be QueryType.SEMANTIC.N)r   SEMANTIC)r0   r$   r#   s      r2   rB   z,AzureAISearchRM.check_semantic_configuration  s-     +[,[[*Y///Y1YY/r3   )__name__
__module____qualname____doc__r   FULLr^   r   intopenaiAzureOpenAIr   boolr   
PRE_FILTERr/   r   rI   r   rE   r   r   dspy
Predictionrc   r   rA   rB   __classcell__)r1   s   @r2   r   r       st   rt '+<@,D-1 %%&&+*3..+/+0+0-1DH+1 1 1 	1
 1 sm1 1 &f&8&891 %SM1 !*1 1 1 1 1  $1  Y'!1" &)#1$ #4.%1& #4.'1( %TN)1* %%5%@%@A+1f[[ [ 	[
 [ [ [ [ [  $[ [ &)[ [ [ ![  ![" -#[zO # ^a "%c49n(= %(3- %TXTcTc %N .C  .c  .QT  .Y]`cYc  .DZr3   r   )rs   warningstypingr   r   r   r   r   rz   dsp.utils.utilsr	   azure.core.credentialsr
   azure.search.documentsr   azure.search.documents._pagingr   azure.search.documents.modelsr   r   r   ImportErrorrv   warnImportWarningRetriever   r<   r3   r2   <module>r      s   
  7 7  #	93>ZZmZdmm mZ  
	O   HMMns   A A( A%(BB