
    +#h*                         d dl Z d dlmZmZ d dlZ	 d dlZd dlmZ d dlmZ 	 d dl
Z
 G d d	ej                        Zy# e	$ r	  e	d      w xY w# e	$ r  e j                  de       Y Bw xY w)
    N)CallableOptional)register_vector)sqlzcThe 'pgvector' extra is required to use PgVectorRM. Install it with `pip install dspy-ai[pgvector]`z_`openai` is not installed. Install it with `pip install openai` to use OpenAI embedding models.)categoryc                        e Zd ZdZ	 	 	 	 	 	 	 ddededeej                     dee   de	dedee
e      d	ed
ef fdZdefdZdede
e   fdZ xZS )
PgVectorRMa5  
    Implements a retriever that (as the name suggests) uses pgvector to retrieve passages,
    using a raw SQL query and a postgresql connection managed by psycopg2.

    It needs to register the pgvector extension with the psycopg2 connection

    Returns a list of dspy.Example objects

    Args:
        db_url (str): A PostgreSQL database URL in psycopg2's DSN format
        pg_table_name (Optional[str]): name of the table containing passages
        openai_client (openai.OpenAI): OpenAI client to use for computing query embeddings. Either openai_client or embedding_func must be provided.
        embedding_func (Callable): A function to use for computing query embeddings. Either openai_client or embedding_func must be provided.
        k (Optional[int]): Default number of top passages to retrieve. Defaults to 20
        embedding_field (str = "embedding"): Field containing passage embeddings. Defaults to "embedding"
        fields (List[str] = ['text']): Fields to retrieve from the table. Defaults to "text"
        embedding_model (str = "text-embedding-ada-002"): Field containing the OpenAI embedding model to use. Defaults to "text-embedding-ada-002"

    Examples:
        Below is a code snippet that shows how to use PgVector as the default retriever

        ```python
        import dspy
        import openai
        import psycopg2

        openai.api_key = os.environ.get("OPENAI_API_KEY", None)
        openai_client = openai.OpenAI()

        llm = dspy.OpenAI(model="gpt-3.5-turbo")

        DATABASE_URL should be in the format postgresql://user:password@host/database
        db_url=os.getenv("DATABASE_URL")

        retriever_model = PgVectorRM(conn, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20)
        dspy.settings.configure(lm=llm, rm=retriever_model)
        ```

        Below is a code snippet that shows how to use PgVector in the forward() function of a module
        ```python
        self.retrieve = PgVectorRM(db_url, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20)
        ```
    db_urlpg_table_nameopenai_clientembedding_funckembedding_fieldfieldsembedding_modelinclude_similarityc
                    |s	|sJ d       || _         || _        t        j                  |      | _        t        | j                         || _        |xs dg| _        || _        || _	        |	| _
        t        
| 1  |       y)z@
        k = 20 is the number of paragraphs to retrieve
        z8Either openai_client or embedding_func must be provided.text)r   N)r   r   psycopg2connectconnr   r   r   r   r   r   super__init__)selfr
   r   r   r   r   r   r   r   r   	__class__s             V/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/retrieve/pgvector_rm.pyr   zPgVectorRM.__init__A   s     j0jj.*,$$V,			"*(.."41    queryc           
      :   | j                  |      }g }t        j                  d      j                  | j                  D cg c]  }t        j
                  |       c}      }| j                  rmt        j                  d      t        j                  d      j                  t        j
                  | j                              z   }||z  }||| j                  f}n|| j                  f}t        j                  d      j                  |t        j
                  | j                        t        j
                  | j                              }| j                  5 }	|	j                         5 }
|
j                  ||       |
j                         }|
j                  D cg c]  }|d   	 }}|D ];  }t!        t#        ||            }|j%                  t'        j(                  di |       = 	 ddd       ddd       |S c c}w c c}w # 1 sw Y   xY w# 1 sw Y   |S xY w)	aS  Search with PgVector for self.k top passages for query using cosine similarity

        Args:
            query  (str): The query to search for
            include_similarity (bool): Whether or not to include the similarity for each record
        Returns:
            dspy.Prediction: an object containing the retrieved passages.
        ,z,1 - ({embedding_field} <=> %s) AS similarity)r   zOselect {fields} from {table} order by {embedding_field} <=> %s::vector limit %s)r   tabler   r   N )_get_embeddingsr   SQLjoinr   
Identifierr   formatr   r   r   r   cursorexecutefetchalldescriptiondictzipappenddspyExample)r   r   query_embeddingretrieved_docsfr   similarity_fieldargs	sql_queryr   currowsdescripcolumnsrowdatas                   r   forwardzPgVectorRM.forward^   s    ..u5""[[$
 NN1$
  ""B&8L8L)M&NO  &&F#_dff=D#TVV,DGG]

&..!3!34NN4+?+?@  
 	 YY 		@$ @# ||~58__E'71:EE @CGS 12D"))$,,*>*>?@@		@ G$
< F@ @		@ s=    G:H(1HG?%AH(H?HH		HHreturnc                     | j                   I| j                   j                  j                  | j                  |d      j                  d   j
                  S | j                  |      S )Nfloat)modelinputencoding_formatr   )r   
embeddingscreater   r<   	embeddingr   )r   r   s     r   r#   zPgVectorRM._get_embeddings   si    )%%0077** ' 8  d1	  i	  &&u--r   )NN   rF   Nztext-embedding-ada-002F)__name__
__module____qualname____doc__strr   openaiOpenAIr   intlistboolr   r=   r@   r#   __classcell__)r   s   @r   r	   r	      s    *^ 6:15#.*.#;',  $FMM2	
 %X.  ! T#Y' ! !%:1S 1f.S .T%[ .r   r	   )warningstypingr   r   r/   r   pgvector.psycopg2r   r   ImportErrorrM   warnImportWarningRetriever	   r"   r   r   <module>rZ      s{     % 1
*D. D.  
m   *HMMs(**s   = A AA*)A*