
    +#hW                        d Z ddlmZmZmZ ddlZddlZddlZddlm	Z	 	 ddl
Zej                  j                  ej                  j                  ej                  j                  fZ	 ddlZddlmc mZ ddlmZmZ ddlmZ ddlmZ  G d	 d
ej:                        Zy# e$ r ej                  ej                  fZY Ww xY w# e$ r	  ed      w xY w)z
Retriever model for chromadb
    )ListOptionalUnionN)dotdict)
EmbeddableEmbeddingFunction)Settings)embedding_functionszcThe chromadb library is required to use ChromadbRM. Install it with `pip install dspy-ai[chromadb]`c                   v    e Zd ZdZ ej
                         ddfdededeee	      dee
j                     def
 fd	Z	 ddededee
j                     d
e
j                  fdZ ej"                  ej$                  ed      dee   d
eee      fd       Z	 ddeeee   f   dee   d
ej2                  fdZ xZS )
ChromadbRMa}  
    A retrieval module that uses chromadb to return the top passages for a given query.

    Assumes that the chromadb index has been created and populated with the following metadata:
        - documents: The text of the passage

    Args:
        collection_name (str): chromadb collection name
        persist_directory (str): chromadb persist directory
        embedding_function (Optional[EmbeddingFunction[Embeddable]]): Optional function to use to embed documents. Defaults to DefaultEmbeddingFunction.
        k (int, optional): The number of top passages to retrieve. Defaults to 7.
        client(Optional[chromadb.Client]): Optional chromadb client provided by user, default to None

    Returns:
        dspy.Prediction: An object containing the retrieved passages.

    Examples:
        Below is a code snippet that shows how to use this as the default retriever:
        ```python
        llm = dspy.OpenAI(model="gpt-3.5-turbo")
        # using default chromadb client
        retriever_model = ChromadbRM('collection_name', 'db_path')
        dspy.settings.configure(lm=llm, rm=retriever_model)
        # to test the retriever with "my query"
        retriever_model("my query")
        ```

        Use provided chromadb client
        ```python
        import chromadb
        llm = dspy.OpenAI(model="gpt-3.5-turbo")
        # say you have a chromadb running on a different port
        client = chromadb.HttpClient(host='localhost', port=8889)
        retriever_model = ChromadbRM('collection_name', 'db_path', client=client)
        dspy.settings.configure(lm=llm, rm=retriever_model)
        # to test the retriever with "my query"
        retriever_model("my query")
        ```

        Below is a code snippet that shows how to use this in the forward() function of a module
        ```python
        self.retrieve = ChromadbRM('collection_name', 'db_path', k=num_passages)
        ```
    N   collection_namepersist_directoryembedding_functionclientkc                 \    | j                  |||       || _        t        |   |       y )N)r   )r   )_init_chromadbefsuper__init__)selfr   r   r   r   r   	__class__s         V/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/retrieve/chromadb_rm.pyr   zChromadbRM.__init__P   s2     	O->vN$1    returnc                     |r|| _         n%t        j                  t        |d            | _         | j                   j	                  |      | _        y)a<  Initialize chromadb and return the loaded index.

        Args:
            collection_name (str): chromadb collection name
            persist_directory (str): chromadb persist directory
            client (chromadb.Client): chromadb client provided by user

        Returns: collection per collection_name
        T)r   is_persistent)nameN)_chromadb_clientchromadbClientr	   get_or_create_collection_chromadb_collection)r   r   r   r   s       r   r   zChromadbRM._init_chromadb_   sR      $*D!$,OO&7"&%
D! %)$9$9$R$R  %S %
!r      )max_timequeriesc                 $    | j                  |      S )zReturn query vector after creating embedding using OpenAI

        Args:
            queries (list): List of query strings to embed.

        Returns:
            List[List[float]]: List of embeddings corresponding to each query.
        )r   )r   r'   s     r   _get_embeddingszChromadbRM._get_embeddings|   s     wwwr   query_or_queriesc                    t        |t              r|gn|}|D cg c]  }|s|	 }}| j                  |      }|| j                  n|} | j                  j
                  d||d|}t        |d   d   |d   d   |d   d   |d   d         }|D 	
cg c]  \  }	}
}}t        |	|
||d       }}}
}	}|S c c}w c c}}}
}	w )	zSearch with db for self.k top passages for query

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.

        Returns:
            dspy.Prediction: An object containing the retrieved passages.
        )query_embeddings	n_resultsidsr   	distances	documents	metadatas)idscore	long_textr1    )
isinstancestrr)   r   r$   queryzipr   )r   r*   r   kwargsr'   q
embeddingsresultszipped_resultsr2   distdocmetas                r   forwardzChromadbRM.forward   s    *C0 ! 	
 &+1++))'2
iDFFQ1$++11 
'1
7=
 EN1K #K #K #	%
 xF  G  G`s`bdhjmos7"t#TXZ[  G  G , Gs   B8B8B=
)N)__name__
__module____qualname____doc__r   DefaultEmbeddingFunctionr7   r   r   r   r!   r"   intr   
Collectionr   backoffon_exceptionexpoERRORSr   floatr)   r   dspy
PredictionrB   __classcell__)r   s   @r   r   r   "   s+   +f (B''),0  %j)
	 ) & -1	

 
 )	

 
		
: W
	 tCy 	 T$u+5F 	 
	  KO %c49n 5:B3-	r   r   )rF   typingr   r   r   rJ   openairO   	dsp.utilsr   openai.errorerrorRateLimitErrorServiceUnavailableErrorAPIErrorrM   	Exceptionr!   "chromadb.utils.embedding_functionsutilsr
   r   chromadb.api.typesr   r   chromadb.configr	   chromadb.utilsImportErrorRetriever   r5   r   r   <module>rb      s    ) (    6ll))6<<+O+OQWQ]Q]QfQfgF33 )2H H%  6##V__5F6  
m s   AB &!C  B=<B= C