
    +#h(                        d Z ddlmZmZmZ ddlZddlZddlmZ 	 ddl	Z	e	 e
d      ddlZ	  eej                  j                  d         dk(  Z	 ddlZej$                  j&                  ej$                  j(                  ej$                  j*                  fZ G d dej.                        Zy# e
$ r dZ	Y w xY w# e$ r dZY rw xY w# e$ r ej&                  ej*                  fZY Ow xY w)	z<
Retriever model for Pinecone
Author: Dhar Rawal (@drawal1)
    )ListOptionalUnionN)dotdictzcThe pinecone library is required to use PineconeRM. Install it with `pip install dspy-ai[pinecone]`Tc                   n    e Zd ZdZ	 	 	 	 	 	 	 ddedee   dee   dee   dee   dee   dee   d	ef fd
Z	 	 	 	 ddedee   dee   dee   dee   dej                  fdZ
d Z ej                  ej                  ed      dee   deee      fd       Zdeeee   f   dej*                  fdZ xZS )
PineconeRMa  
    A retrieval module that uses Pinecone to return the top passages for a given query.

    Assumes that the Pinecone index has been created and populated with the following metadata:
        - text: The text of the passage

    Args:
        pinecone_index_name (str): The name of the Pinecone index to query against.
        pinecone_api_key (str, optional): The Pinecone API key. Defaults to None.
        pinecone_env (str, optional): The Pinecone environment. Defaults to None.
        local_embed_model (str, optional): The local embedding model to use. A popular default is "sentence-transformers/all-mpnet-base-v2".
        openai_embed_model (str, optional): The OpenAI embedding model to use. Defaults to "text-embedding-ada-002".
        openai_api_key (str, optional): The API key for OpenAI. Defaults to None.
        openai_org (str, optional): The organization for OpenAI. Defaults to None.
        k (int, optional): The number of top passages to retrieve. Defaults to 3.

    Returns:
        dspy.Prediction: An object containing the retrieved passages.

    Examples:
        Below is a code snippet that shows how to use this as the default retriver:
        ```python
        llm = dspy.OpenAI(model="gpt-3.5-turbo")
        retriever_model = PineconeRM(openai.api_key)
        dspy.settings.configure(lm=llm, rm=retriever_model)
        ```

        Below is a code snippet that shows how to use this in the forward() function of a module
        ```python
        self.retrieve = PineconeRM(k=num_passages)
        ```
    pinecone_index_namepinecone_api_keypinecone_envlocal_embed_modelopenai_embed_modelopenai_api_key
openai_orgkc	                 2   |	 dd l }	ddlm}
m} |
j                  |      | _        |j                  |      | _        d| _	        |	j                  |	j                  j                         rdn'|	j                  j                  j                         rdnd      | _
        n6|)|| _        d| _	        |r|t         _        |r|t         _        nt'        d	      | j)                  |||      | _        t,        | ]  |
       y # t        $ r}t        d      |d }~ww xY w)Nr   )	AutoModelAutoTokenizerzeYou need to install Hugging Face transformers library to use a local embedding model with PineconeRM.Tzcuda:0mpscpuFz@Either local_embed_model or openai_embed_model must be provided.)r   )torchtransformersr   r   ImportErrorModuleNotFoundErrorfrom_pretrained_local_embed_model_local_tokenizeruse_local_modeldevicecudais_availablebackendsr   _openai_embed_modelopenaiapi_keyorganization
ValueError_init_pinecone_pinecone_indexsuper__init__)selfr	   r
   r   r   r   r   r   r   r   r   r   exc	__class__s                V/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/retrieve/pinecone_rm.pyr*   zPineconeRM.__init__F   s    (A '0&?&?@Q&RD#$1$A$ABS$TD!#'D ,,!JJ335++88:DK
  +'9D$#(D !/&0#R   $22!1< 
 	1=  )ws   C< <	DDD
index_namer$   environment	dimensiondistance_metricreturnc                     i }|r||d<   |r||d<   t        j                  di | t        j                         }||vr'||t        d      t        j                  |||       t        j
                  |      S )an  Initialize pinecone and return the loaded index.

        Args:
            index_name (str): The name of the index to load. If the index is not does not exist, it will be created.
            api_key (str, optional): The Pinecone API key, defaults to env var PINECONE_API_KEY if not provided.
            environment (str, optional): The environment (ie. `us-west1-gcp` or `gcp-starter`. Defaults to env PINECONE_ENVIRONMENT.

        Raises:
            ValueError: If api_key or environment is not provided and not set as an environment variable.

        Returns:
            pinecone.Index: The loaded index.
        r$   r0   zWdimension and distance_metric must be provided since the index provided does not exist.)namer1   metric )pineconeinitlist_indexesr&   create_indexIndex)r+   r/   r$   r0   r1   r2   kwargsactive_indexess           r.   r'   zPineconeRM._init_pineconeu   s    .  'F9$/F=!!..0^+ _%< m  !!#& ~~j))    c                 8   	 dd l }|d   }|j                  d      j	                  |j                               j                         }|j                  ||z  d      |j                  |j                  d      d      z  S # t        $ r}t        d      |d }~ww xY w)Nr   IYou need to install torch to use a local embedding model with PineconeRM.   g&.>)min)	r   r   r   	unsqueezeexpandsizefloatsumclamp)r+   model_outputattention_maskr   r,   token_embeddingsinput_mask_expandeds          r.   _mean_poolingzPineconeRM._mean_pooling   s    
	 (?,66r:AABRBWBWBYZ``byy),??CekkReRiRijkRlrvkFwww  	%[	s   A? ?	BBB   )max_timequeriesc                 8   	 ddl }| j                  st        r,t
        j                  j                  || j                        }n9t
        j                  j                  || j                        j                         }|d   D cg c]  }|d   	 c}S | j                  |ddd	      j                  | j                        }|j                         5   | j                  di |j                  | j                        }ddd       | j!                  |d
         }|j"                  j$                  j'                  |dd      }|j)                         j+                         j-                         S # t        $ r}t        d      |d}~ww xY wc c}w # 1 sw Y   xY w)zReturn query vector after creating embedding using OpenAI

        Args:
            queries (list): List of query strings to embed.

        Returns:
            List[List[float]]: List of embeddings corresponding to each query.
        r   NrA   )inputmodeldata	embeddingTpt)padding
truncationreturn_tensorsrL      rC   )pdimr7   )r   r   r   r   OPENAI_LEGACYr#   	Embeddingcreater"   
embeddings
model_dumpr   tor   no_gradr   rO   nn
functional	normalizer   numpytolist)	r+   rR   r   r,   rW   encoded_inputrK   rb   normalized_embeddingss	            r.   _get_embeddingszPineconeRM._get_embeddings   s   "	 ##",,33!)A)A 4 	 #--44!)A)A 5 *,  =Ff<MNyIk*NN --gtPTei-jmmnrnynyz]]_ 	T2422S]5E5Edkk5RSL	T ''mDT6UV
 % 3 3 = =jAST = U$((*00299;;/  	%[	 O	T 	Ts)   E. F,F.	F7FFFquery_or_queriesc           
      D   t        |t              r|gn|}|D cg c]  }|s|	 }}| j                  |      }t        |      dk(  r| j                  j                  |d   | j                  d      }t        |d   d d      }|D cg c]
  }|d   d	    }}t        |D 	ci c]  }	d
|	 c}	      g}t        j                  |      S i }
|D ]^  }| j                  j                  || j                  dz  d      }|d   D ])  }|
j                  |d   d	   d      |d   z   |
|d   d	   <   + ` t        |
j                         d d      d| j                   }t        j                  |D 	cg c]  \  }	}t        d
|	i       c}}	      S c c}w c c}w c c}	w c c}}	w )a  Search with pinecone for self.k top passages for query

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.

        Returns:
            dspy.Prediction: An object containing the retrieved passages.
        rC   r   T)top_kinclude_metadatamatchesc                 &    | j                  dd      S )Nscores        )getxs    r.   <lambda>z$PineconeRM.forward.<locals>.<lambda>   s    quuXs7K r?   )keyreversemetadatatext	long_text)passages   ru   scorec                     | d   S )NrC   r7   rw   s    r.   ry   z$PineconeRM.forward.<locals>.<lambda>  s
    !A$ r?   N)
isinstancestrrm   lenr(   queryr   sortedr   dspy
Predictionrv   items)r+   rn   rR   qrb   results_dictsorted_resultsresultr   passagepassage_scoresrW   sorted_passages_s                 r.   forwardzPineconeRM.forward   s    *C0 ! 	
 &+1++))'2
 w<1//551TVVd 6 L
 $Y'-KUYN BPPvz*62PHPX N'g!5 NOPH??H55 # 	I//55!d 6 L 'y1 "&&vj'9&'A3GWo& vj1&9:		 !  "

DFF [j(kZWVW+w1G)H(kllC , Q N& )ls   FFF"
F-F
)NNNztext-embedding-ada-002NNr   )NNNN)__name__
__module____qualname____doc__r   r   intr*   r8   r<   r'   rO   backoffon_exceptionexpoERRORSr   rH   rm   r   r   r   r   __classcell__)r-   s   @r.   r   r   $   si   H +/&*+/,D(,$(- - #3-- sm	-
 $C=- %SM- !- SM- -d "&%)#')-+*+* #+* c]	+*
 C=+* "#+* 
+*Zx  W
%<c%< 
d5k	%<
%<T/mc49n(= /m$// /mr?   r   )r   typingr   r   r   r   r   	dsp.utilsr   r8   r   r#   r   version__version__r_   	Exceptionopenai.errorerrorRateLimitErrorServiceUnavailableErrorAPIErrorr   Retriever   r7   r?   r.   <module>r      s   
 ) (    
m  22156!;M6ll))6<<+O+OQWQ]Q]QfQfgFlm lm+  H  M  6##V__5F6s5   B- "B: AC -B76B7:CCC'&C'