
    +#h                         d Z ddlmZmZ ddlZddlZddlmZ ddl	m
Z
 	 ddlZe ed       G d dej                        Zy# e$ r dZY )w xY w)	zxRetriever model for faiss: https://github.com/facebookresearch/faiss.
Author: Jagane Sundar: https://github.com/jagane.
    )OptionalUnionN)SentenceTransformersVectorizer)dotdictzb
        The faiss package is required. Install it using `pip install dspy-ai[faiss-cpu]`
        c                   r     e Zd ZdZddef fdZd	dZd
deee	e   f   de
e   dej                  fdZ xZS )FaissRMa  A retrieval module that uses an in-memory Faiss to return the top passages for a given query.

    Args:
        document_chunks: the input text chunks
        vectorizer: an object that is a subclass of BaseSentenceVectorizer
        k (int, optional): The number of top passages to retrieve. Defaults to 3.

    Returns:
        dspy.Prediction: An object containing the retrieved passages.

    Examples:
        Below is a code snippet that shows how to use this as the default retriver:
        ```python
        import dspy
        from dspy.retrieve import faiss_rm

        document_chunks = [
            "The superbowl this year was played between the San Francisco 49ers and the Kanasas City Chiefs",
            "Pop corn is often served in a bowl",
            "The Rice Bowl is a Chinese Restaurant located in the city of Tucson, Arizona",
            "Mars is the fourth planet in the Solar System",
            "An aquarium is a place where children can learn about marine life",
            "The capital of the United States is Washington, D.C",
            "Rock and Roll musicians are honored by being inducted in the Rock and Roll Hall of Fame",
            "Music albums were published on Long Play Records in the 70s and 80s",
            "Sichuan cuisine is a spicy cuisine from central China",
            "The interest rates for mortgages is considered to be very high in 2024",
        ]

        frm = faiss_rm.FaissRM(document_chunks)
        turbo = dspy.OpenAI(model="gpt-3.5-turbo")
        dspy.settings.configure(lm=turbo, rm=frm)
        print(frm(["I am in the mood for Chinese food"]))
        ```

        Below is a code snippet that shows how to use this in the forward() function of a module
        ```python
        self.retrieve = FaissRM(k=num_passages)
        ```
    kc                    |r|| _         nt               | _         | j                  |      }t        j                  |      }t	        |d         }t
        j                  j                  d|        t	        |      dk  r6t        j                  |      | _
        | j                  j                  |       nid}t        j                  |      }t        j                  |||      | _
        | j                  j                  |       | j                  j                  |       t
        j                  j                  | j                  j                   d       || _        t         	| E  |       y)zInits the faiss retriever.

        Args:
            document_chunks: a list of input strings.
            vectorizer: an object that is a subclass of BaseTransformersVectorizer.
            k: number of matches to return.
        r   zFaissRM: embedding size=d   z vectors in faiss index)r	   N)_vectorizerr   nparraylendspyloggerinfofaissIndexFlatL2_faiss_indexaddIndexIVFFlattrainntotal_document_chunkssuper__init__)
selfdocument_chunks
vectorizerr	   
embeddingsxbdnlist	quantizer	__class__s
            S/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/retrieve/faiss_rm.pyr   zFaissRM.__init__D   s'    )D=?D%%o6
XXj!1J3A378r7S= % 1 1! 4D!!"% E))!,I % 2 29a GD##B'!!"%D--4455LMN /1    returnc                 H   t        t        |            D ]  }||   }||   }t        j                  j	                  d||           t        t        |            D ]C  }t        j                  j	                  d| d||    d||    d| j
                  ||              E  y )NzQuery: z    Hit z = /z: )ranger   r   r   debugr   )r   queries
index_listdistance_listiindices	distancesjs           r&   _dump_raw_resultszFaissRM._dump_raw_resultsd   s    s7|$ 	uA mG%a(IKK
|453w<( u!!HQCs71:,a	!~RPTPePefmnofpPqOr"stu		u 	r'   query_or_queriesc           	         t        |t              r|gn|D cg c]  }|s|	 c}| j                        }t        j                  |      }t              dk(  rp| j                  j                  ||xs | j                        \  }}|d   D 	cg c]  }	| j                  |	   |	f }
}	|
D cg c]  }t        |d   |d   d       c}S  | j                  j                  ||xs | j                  dz  fi |\  }}i }t        t        |            D ]V  }||   }||   }t        |xs | j                  dz        D ]+  }||   }||   }||v r||   j                  |       &|g||<   - X t        |j                         fd      d|xs | j                   }|D cg c]   \  }}t        | j                  |   |d      " c}}S c c}w c c}	w c c}w c c}}w )a  Search the faiss index for k or self.k top passages for query.

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.

        Returns:
            dspy.Prediction: An object containing the retrieved passages.
           r   )	long_textindex   c                 T    t              t        | d         z
  t        | d         fS )Nr7   )r   sum)xr-   s    r&   <lambda>z!FaissRM.forward.<locals>.<lambda>   s/    GWZ[\]^[_W`H`befghifjbkGl r'   )keyN)
isinstancestrr   r   r   r   r   searchr	   r   r   r+   appendsorteditems)r   r5   r	   kwargsqr    emb_npar/   r.   indpassagespassagepassage_scoresembr1   r2   resneighbordistancesorted_passagespassage_index_r-   s                         @r&   forwardzFaissRM.forwardm   s    )33CS(I#$O_%+1+%%g.
((:&w<1(,(9(9(@(@!+tvv(V%M:EOPQ]Sc..s3S9SHS[cdPWG'!*wqzJKdd$<D$5$5$<$<Wq{DFFVWFW$b[a$b!zZ) 	:C oG%I ak466Q./ :"3<$S>~-"8,33H=08zN8,:	:  !!5!5!7=lmak466

 %4
 q $"7"7"FQ^_`
 	
A , Td0
s   GGG0G#%G)Nr:   )r(   N)N)__name__
__module____qualname____doc__intr   r4   r   rA   listr   r   
PredictionrT   __classcell__)r%   s   @r&   r   r      sM    'RC @-
c49n(= -
(3- -
eietet -
r'   r   )rX   typingr   r   numpyr   r   dsp.modules.sentence_vectorizerr   	dsp.utilsr   r   ImportErrorRetriever    r'   r&   <module>rd      sd    #   J  	=
	 @
dmm @
  Es   A AA