
    R"h                     *   d dl Z d dlZd dlmZmZmZ d dlZd dlZd dlZd dl	m
Z
mZ ddlmZ  G d dej                        Z G d d	ej                        Z G d
 dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Zy)    N)CallableUnionList)backoff_hdlrgiveup_hdlr   )WebPageHelperc                   V     e Zd Zddef fdZd Zg fdeeee   f   dee   fdZ	 xZ
S )YouRMis_valid_sourcec                     t         |   |       |s*t        j                  j	                  d      st        d      |r|| _        nt        j                  d   | _        d| _        |r|| _        y d | _        y )NkYDC_API_KEYzCYou must supply ydc_api_key or set environment variable YDC_API_KEYr   c                      yNT xs    3/var/www/html/sandstorm/storm/knowledge_storm/rm.py<lambda>z YouRM.__init__.<locals>.<lambda>           )	super__init__osenvirongetRuntimeErrorydc_api_keyusager   )selfr    r   r   	__class__s       r   r   zYouRM.__init__   so    12::>>-#@U  *D!zz-8D
 #2D #1D r   c                 0    | j                   }d| _         d|iS )Nr   r   r!   r"   r!   s     r   get_usage_and_resetzYouRM.get_usage_and_reset    s    


r   query_or_queriesexclude_urlsc                    t        |t              r|gn|}| xj                  t        |      z  c_        g }|D ]  }	 d| j                  i}t        j                  d| |      j                         }g }|d   D ]0  }	| j                  |	d         s|	d   |vs |j                  |	       2 d|v r|j                  |d| j                           |S # t        $ r%}
t        j                  d| d|
        Y d}
~
d}
~
ww xY w)	a  Search with You.com for self.k top passages for query or queries

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.

        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        z	X-API-Keyz&https://api.ydc-index.io/search?query=headershitsurlN"Error occurs when searching query : )
isinstancestrr!   lenr    requestsr   jsonr   appendextendr   	Exceptionloggingerror)r"   r(   r)   queriescollected_resultsqueryr,   resultsauthoritative_resultsres              r   forwardzYouRM.forward&   s+    *C0 ! 	
 	

c'l"
 	QEQ&(8(89",,<UGD# $& 
 )+%  8A++AeH5!E(,:V-44Q78 W$%,,-B8TVV-LM	Q" !   Q B5'A3OPPQs$   ACC5C	D C;;D N   N__name__
__module____qualname__r   r   r'   r   r2   r   rB   __classcell__r#   s   @r   r   r      sA    2x 2$  RT$! %c49n 5$!EI#Y$!r   r   c                   n     e Zd Z	 	 	 	 	 	 	 	 d	dededef fdZd Zg fdeee	e   f   de	e   fdZ
 xZS )

BingSearchr   min_char_countsnippet_chunk_sizec	                 B   t         
|   |       |s*t        j                  j	                  d      st        d      |r|| _        nt        j                  d   | _        d| _        |||d|	| _        t        |||      | _
        d| _        |r|| _        y	d | _        y	)
a  
        Params:
            min_char_count: Minimum character count for the article to be considered valid.
            snippet_chunk_size: Maximum character count for each snippet.
            webpage_helper_max_threads: Maximum number of threads to use for webpage helper.
            mkt, language, **kwargs: Bing search API parameters.
            - Reference: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/query-parameters
        r   BING_SEARCH_API_KEYz\You must supply bing_search_subscription_key or set environment variable BING_SEARCH_API_KEYz*https://api.bing.microsoft.com/v7.0/search)mktsetLangcountrM   rN   max_thread_numr   c                      yr   r   r   s    r   r   z%BingSearch.__init__.<locals>.<lambda>x   r   r   N)r   r   r   r   r   r   bing_api_keyendpointparamsr	   webpage_helperr!   r   )r"   bing_search_api_keyr   r   rM   rN   webpage_helper_max_threadsrQ   languagekwargsr#   s             r   r   zBingSearch.__init__N   s    ( 	1"2::>>:O+Pn  ! 3D "

+@ ADD!hMfM+)15

 
 #2D #1D r   c                 0    | j                   }d| _         d|iS )Nr   rL   r%   r&   s     r   r'   zBingSearch.get_usage_and_resetz   s    


e$$r   r(   r)   c                    t        |t              r|gn|}| xj                  t        |      z  c_        i }d| j                  i}|D ]  }	 t        j                  | j                  |i | j                  d|i      j                         }|d   d   D ]4  }| j                  |d         s|d   |vs |d   |d   |d   d	||d   <   6  | j                  j                  t!        |j#                                     }
g }|
D ]#  }||   }|
|   d   |d<   |j%                  |       % |S # t        $ r&}	t        j                  d
| d|	        Y d}	~	d}	~	ww xY w)a  Search with Bing for self.k top passages for query or queries

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.

        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        zOcp-Apim-Subscription-Keyqr,   rY   webPagesvaluer.   namesnippet)r.   titledescriptionr/   r0   Nsnippets)r1   r2   r!   r3   rW   r4   r   rX   rY   r5   r   r8   r9   r:   rZ   urls_to_snippetslistkeysr6   )r"   r(   r)   r;   url_to_resultsr,   r=   r>   drA   valid_url_to_snippetsr<   r.   r@   s                 r   rB   zBingSearch.forward   s    *C0 ! 	
 	

c'l"
.0A0AB 	QEQ",,MM7;Vdkk;V3PU;V$&  !,W5 A++AeH5!E(,:V#$U8%&vY+,Y<4qx0	Q  !% 3 3 D D$$&'!
 ( 	(Cs#A1#6zBAjM$$Q'	(
 !   Q B5'A3OPPQs%   
AD)*D)2D))	E2EE)NrD   N     
   zen-USenrF   rG   rH   r   intr   r'   r   r2   r   rB   rI   rJ   s   @r   rL   rL   M   ss     !
$(!"&#%*2 "	*2
 *2  *2X% RT0! %c49n 50!EI#Y0!r   rL   c            	            e Zd ZdZ	 	 ddedededef fdZd Zded	efd
ZdefdZ	d Z
d Zdeeee   f   dee   fdZ xZS )VectorRMa  Retrieve information from custom documents using Qdrant.

    To be compatible with STORM, the custom documents should have the following fields:
        - content: The main text content of the document.
        - title: The title of the document.
        - url: The URL of the document. STORM use url as the unique identifier of the document, so ensure different
            documents have different urls.
        - description (optional): The description of the document.
    The documents should be stored in a CSV file.
    collection_nameembedding_modeldevicer   c                     ddl m} 	 t        |   |       d| _        |st        d      |st        d      d|i}ddi} ||||	      | _        || _        d | _        d | _	        y )
Nr   )HuggingFaceEmbeddingsr   z!Please provide a collection name.z"Please provide an embedding model.rz   normalize_embeddingsT)
model_namemodel_kwargsencode_kwargs)
langchain_huggingfacer|   r   r   r!   
ValueErrormodelrx   clientqdrant)	r"   rx   ry   rz   r   r|   r   r   r#   s	           r   r   zVectorRM.__init__   s     	@	 	1
@AAABB &)/6*&%'

  /r   c                 N   ddl m} 	 | j                  t        d      | j                  j	                  | j
                         rHt        d| j
                   d        || j                  | j
                  | j                        | _        y t        d| j
                   d      )	Nr   )Qdrantz!Qdrant client is not initialized.rx   zCollection z" exists. Loading the collection...)r   rx   
embeddingsz4 does not exist. Please create the collection first.)	langchain_qdrantr   r   r   collection_existsrx   printr   r   )r"   r   s     r   _check_collectionzVectorRM._check_collection   s    +	 ;;@AA;;((D<P<P;Q(Sd2233UV !{{ $ 4 4::DK d2233gh r   r.   api_keyc                    ddl m} 	 |5t        j                  d      st	        d      t        j                  d      }|t	        d      	  |||      | _        | j                          y # t        $ r}t	        d|       d }~ww xY w)Nr   QdrantClientQDRANT_API_KEYzPlease provide an api key.z+Please provide a url for the Qdrant server.)r.   r   z,Error occurs when connecting to the server: )qdrant_clientr   r   getenvr   r   r   r8   )r"   r.   r   r   rA   s        r   init_online_vector_dbzVectorRM.init_online_vector_db   s    .	 ?99-. !=>>ii 01G;JKK	Q&3@DK""$ 	QKA3OPP	Qs   A- -	B	6BB	vector_store_pathc                     ddl m} 	 |t        d      	  ||      | _        | j	                          y # t
        $ r}t        d|       d }~ww xY w)Nr   r   zPlease provide a folder path.)pathz,Error occurs when loading the vector store: )r   r   r   r   r   r8   )r"   r   r   rA   s       r   init_offline_vector_dbzVectorRM.init_offline_vector_db  sb    .	 $<==	Q&,=>DK""$ 	QKA3OPP	Qs   5 	AAAc                 0    | j                   }d| _         d|iS )Nr   rw   r%   r&   s     r   r'   zVectorRM.get_usage_and_reset#  s    


E""r   c                 b    | j                   j                  j                  | j                        S )z
        Get the count of vectors in the collection.

        Returns:
            int: Number of vectors in the collection.
        r   )r   r   rS   rx   )r"   s    r   get_vector_countzVectorRM.get_vector_count)  s(     {{!!''8L8L'MMr   r(   r)   c           	         t        |t              r|gn|}| xj                  t        |      z  c_        g }|D ]  }| j                  j                  || j                        }t        t        |            D ]R  }||   d   }|j                  |j                  d   |j                  g|j                  d   |j                  d   d       T  |S )a  
        Search in your data for self.k top passages for query or queries.

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): Dummy parameter to match the interface. Does not have any effect.

        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        r   r   rh   rg   r.   rh   ri   rg   r.   )r1   r2   r!   r3   r   similarity_search_with_scorer   ranger6   metadatapage_content)	r"   r(   r)   r;   r<   r=   related_docsidocs	            r   rB   zVectorRM.forward2  s     *C0 ! 	
 	

c'l"
 	E;;CCETVVCTL3|,- 	"1oa(!(('*||M'B%(%5%5$6!$g!6"||E2			 ! r   )mpsrD   )rF   rG   rH   __doc__r2   ru   r   r   r   r   r'   r   r   r   rB   rI   rJ   s   @r   rw   rw      s    	 ## # 	#
 #J,Q Qs Q.Q Q$#N!c49n(= !TRUY !r   rw   c                   `     e Zd ZdZd	 fd	Zd ZdefdZg fdeee	e   f   de	e   fdZ
 xZS )
StanfordOvalArxivRMzS[Alpha] This retrieval class is for internal use only, not intended for the public.c                 P    t         |   |       || _        d| _        || _        y )Nr   r   )r   r   rX   r!   rerank)r"   rX   r   r   r#   s       r   r   zStanfordOvalArxivRM.__init__W  s(    1 
r   c                 0    | j                   }d| _         d|iS )Nr   r   r%   r&   s     r   r'   z'StanfordOvalArxivRM.get_usage_and_reset]  s    


%u--r   r=   c                    || j                   | j                  d}t        j                  | j                  |ddi      }|j
                  dk(  rz|j                         d   d   }g }|D ][  }|d   |d	   |d
   g|j                  dd      |j                         D ci c]  \  }}|dvr|| c}}d}	|j                  |	       ] |S t        d|j
                         c c}}w )N)r=   
num_blocksr   Content-Typeapplication/json)r5   r,      r   r>   document_titler.   contentrh   N/A)r   r.   r   )rg   r.   ri   rh   metaz0Error: Unable to retrieve results. Status code: )r   r   r4   postrX   status_coder5   r   itemsr6   r8   )
r"   r=   payloadresponseresponse_data_listr>   response_datakeyrd   results
             r   	_retrievezStanfordOvalArxivRM._retrievec  s   !4;;O==MM.BT1U

 3&!)!3I!>G!3 '*+;<(/!.y!9 :#0#4#4]E#J +8*=*=*?&C&JJ U

 v&' NB8CWCWBXY s   C r(   r)   c                     g }t        |t              r|gn|}|D ]%  }	 | j                  |      }|j                  |       ' |S # t        $ r%}t        j                  d| d|        Y d }~Sd }~ww xY w)Nr/   r0   )r1   r2   r   r7   r8   r9   r:   )r"   r(   r)   r<   r;   r=   r>   rA   s           r   rB   zStanfordOvalArxivRM.forward  s      *C0 ! 	  	QEQ../!((1	Q !   Q B5'A3OPPQs   "A	A2A--A2)rD   T)rF   rG   rH   r   r   r'   r2   r   r   r   rB   rI   rJ   s   @r   r   r   T  sJ    ].s B RT! %c49n 5!EI#Y!r   r   c                   n     e Zd ZdZ	 	 	 	 	 	 	 d
dedef fdZd Zd Zdee	e
e	   f   de
e	   fd	Z xZS )SerperRMz:Retrieve information from custom queries using Serper.dev.rM   rN   c                    t         |   |       d| _        d| _        || _        t        |||      | _        ||ddd| _        n$|| _        | j                  j                  d|i       || _        | j                  s*t        j                  j                  d	      st        d
      | j                  r|| _        d| _        yt        j                  d	   | _        d| _        y)a  Args:
        serper_search_api_key str: API key to run serper, can be found by creating an account on https://serper.dev/
        query_params (dict or list of dict): parameters in dictionary or list of dictionaries that has a max size of 100 that will be used to query.
            Commonly used fields are as follows (see more information in https://serper.dev/playground):
                q str: query that will be used with google search
                type str: type that will be used for browsing google. Types are search, images, video, maps, places, etc.
                gl str: Country that will be focused on for the search
                location str: Country where the search will originate from. All locates can be found here: https://api.serper.dev/locations.
                autocorrect bool: Enable autocorrect on the queries while searching, if query is misspelled, will be updated.
                results int: Max number of results per page.
                page int: Max number of pages per call.
                tbs str: date time range, automatically set to any time by default.
                qdr:h str: Date time range for the past hour.
                qdr:d str: Date time range for the past 24 hours.
                qdr:w str: Date time range for past week.
                qdr:m str: Date time range for past month.
                qdr:y str: Date time range for past year.
        r   r   NrT   Tr   )numautocorrectpager   SERPER_API_KEYzXYou must supply a serper_search_api_key param or set environment variable SERPER_API_KEYzhttps://google.serper.dev)r   r   r!   query_paramsENABLE_EXTRA_SNIPPET_EXTRACTIONr	   rZ   updateserper_search_api_keyr   r   r   r   base_url)	r"   r   r   r   r   rM   rN   r\   r#   s	           r   r   zSerperRM.__init__  s    8 	1
 /N,+)15
 ()$ JD ,D$$eQZ0%:"))"**..AQ2Rj  '')>D&
 4 *,4D)ED&3r   c                    | j                    d| _        | j                  dd}t        j                  d| j                  ||      }|d k(  r%t        d|j                   d|j                         |j                         S )Nz/searchr   )z	X-API-KEYr   POST)r,   r5   z?Error had occurred while running the search process.
 Error is z, had failed with status code )	r   
search_urlr   r4   requestr   reasonr   r5   )r"   r   r,   r   s       r   serper_runnerzSerperRM.serper_runner  s    !]]O73 33.

 ##DOOW<
 tRS[SbSbRc  dB  CK  CW  CW  BX  Y  }}r   c                 0    | j                   }d| _         d|iS )Nr   r   r%   r&   s     r   r'   zSerperRM.get_usage_and_reset  s    


E""r   r(   r)   c           
         t        |t              r|gn|}| xj                  t        |      z  c_        g | _        g }|D ]Y  }|dk(  r	| j
                  }||d<   d|d<   | j                  |      | _        | j                  j                  | j                         [ g }| j                  rmg }| j                  D ]@  }|j                  dg       }	|	D ]'  }
|
j                  d      }|s|j                  |       ) B | j                  j                  |      }ni }| j                  D ]  }	 |j                  d      }	|j                  d      }|	D ]  }
|
j                  d      g}| j                  r1|j                  |j                  i       j                  d	g              |j                  ||
j                  d
      |
j                  d      ||j                  d      ndd         |S #  Y xY w)a  
        Calls the API and searches for the query passed in.


        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): Dummy parameter to match the interface. Does not have any effect.

        Returns:
            a list of dictionaries, each dictionary has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        zQueries:ra   searchtypeorganiclinkknowledgeGraphrf   ri   rg   rh    ri   rg   r.   rh   )r1   r2   r!   r3   r>   r   r   r   r6   r   r   rZ   rj   r7   )r"   r(   r)   r;   r<   r=   r   urlsr   organic_resultsr   r.   ro   knowledge_graphri   s                  r   rB   zSerperRM.forward  s    *C0 ! 	 	

c'l"
 	-E
",,L !&L $,L ,,\:DKLL,	-  //D,, )"(**Y";. )G!++f-CC()) %)$7$7$H$H$N!$&!ll 	F"(**Y"7"(**-=">. G 'I 67H;; 155c2>BB:rR &,,(0%,[[%9#*;;v#6 $3#> !0 3 3M B%'		4 ! s   (B?G++G/)NrD   NFrp   rq   rr   )rF   rG   rH   r   ru   r   r   r'   r   r2   r   rB   rI   rJ   s   @r   r   r     si    D #
(-!"&#%74 74  74r&#
M!c49n(= M!TRUY M!r   r   c                   X     e Zd Z	 ddef fdZd Zg fdeeee   f   dee   fdZ	 xZ
S )BraveRMr   c                     t         |   |       |s*t        j                  j	                  d      st        d      |r|| _        nt        j                  d   | _        d| _        |r|| _        y d | _        y )Nr   BRAVE_API_KEYzNYou must supply brave_search_api_key or set environment variable BRAVE_API_KEYr   c                      yr   r   r   s    r   r   z"BraveRM.__init__.<locals>.<lambda>M  r   r   )	r   r   r   r   r   r   brave_search_api_keyr!   r   )r"   r   r   r   r#   s       r   r   zBraveRM.__init__;  sq     	1#BJJNN?,K`  "(<D%(*

?(CD%
 #2D #1D r   c                 0    | j                   }d| _         d|iS )Nr   r   r%   r&   s     r   r'   zBraveRM.get_usage_and_resetO  s    


5!!r   r(   r)   c           
      J   t        |t              r|gn|}| xj                  t        |      z  c_        g }|D ]  }	 dd| j                  d}t        j                  d| |      j                         }|j                  di       j                  dg       }|D ]U  }	|j                  |	j                  dg       |	j                  d	      |	j                  d
      |	j                  d      d       W  |S # t        $ r%}
t        j                  d| d|
        Y d}
~
d}
~
ww xY w)a  Search with api.search.brave.com for self.k top passages for query or queries

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.

        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        r   gzip)AcceptzAccept-EncodingzX-Subscription-TokenzChttps://api.search.brave.com/res/v1/web/search?result_filter=web&q=r+   webr>   extra_snippetsrg   r.   rh   r   r/   r0   N)r1   r2   r!   r3   r   r4   r   r5   r6   r8   r9   r:   )r"   r(   r)   r;   r<   r=   r,   r   r>   r   rA   s              r   rB   zBraveRM.forwardU  s>    *C0 ! 	
 	

c'l"
 	QEQ0'-,0,E,E
 $<<YZ_Y`a# $&  #,,ub155iD% F%,,(.

3CR(H%+ZZ%8#)::e#4+1::m+D		Q2 !   Q B5'A3OPPQs   B4C44	D"=DD"rC   rE   rJ   s   @r   r   r   :  sE    JN2?G2(" RT,! %c49n 5,!EI#Y,!r   r   c                   \     e Zd Z	 	 	 ddef fdZd Zg fdeeee   f   dee   fdZ	 xZ
S )SearXNGr   c                     t         |   |       |st        d      || _        || _        d| _        |r|| _        yd | _        y)a  Initialize the SearXNG search retriever.
        Please set up SearXNG according to https://docs.searxng.org/index.html.

        Args:
            searxng_api_url (str): The URL of the SearXNG API. Consult SearXNG documentation for details.
            searxng_api_key (str, optional): The API key for the SearXNG API. Defaults to None. Consult SearXNG documentation for details.
            k (int, optional): The number of top passages to retrieve. Defaults to 3.
            is_valid_source (Callable, optional): A function that takes a URL and returns a boolean indicating if the
            source is valid. Defaults to None.
        r   zYou must supply searxng_api_urlr   c                      yr   r   r   s    r   r   z"SearXNG.__init__.<locals>.<lambda>  r   r   N)r   r   r   searxng_api_urlsearxng_api_keyr!   r   )r"   r   r   r   r   r#   s        r   r   zSearXNG.__init__  sN    " 	1@AA..
#2D #1D r   c                 0    | j                   }d| _         d|iS )Nr   r   r%   r&   s     r   r'   zSearXNG.get_usage_and_reset  s    


5!!r   r(   r)   c           
      p   t        |t              r|gn|}| xj                  t        |      z  c_        g }| j                  rdd| j                   ini }|D ]  }	 |dd}t        j                  | j                  ||      }|j                         }	|	d   D ]i  }
| j                  |
d         s|
d   |vs |j                  |
j                  dd	      |
j                  dd	      g|
j                  d
d	      |
d   d       k  |S # t        $ r%}t        j                  d| d|        Y d}~d}~ww xY w)a  Search with SearxNG for self.k top passages for query or queries

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.

        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        AuthorizationzBearer r5   )ra   formatrb   r>   r.   r   r   rg   r   r/   r0   N)r1   r2   r!   r3   r   r4   r   r   r5   r   r6   r8   r9   r:   )r"   r(   r)   r;   r<   r,   r=   rY   r   r>   r@   rA   s               r   rB   zSearXNG.forward  s`    *C0 ! 	
 	

c'l"
 ## (<(<'=>? 	  	QEQ$7#<<(('& #--/ + 	A++AeH5!E(,:V)00/0uuY/C-.UU9b-A,B)*w);'(x			Q* !   Q B5'A3OPPQs&   AD/D7AD	D5D00D5rC   rE   rJ   s   @r   r   r     sO     
$(2
 "2:" RT.! %c49n 5.!EI#Y.!r   r   c                        e Zd ZdZ	 	 	 	 	 	 	 ddedededededef fd	Zd
 Z e	j                  e	j                  efddee      defd       Zg fdeeee   f   dee   fdZ xZS )DuckDuckGoSearchRMz:Retrieve information from custom queries using DuckDuckGo.rq   r   r   rM   rN   safe_searchregionc                    t         
|   |       	 ddlm} || _        t        |||      | _        d| _        d| _	        || _
        || _        |r|| _        nd | _         |       | _        y# t        $ r}	t	        d      |	d}	~	ww xY w)	a\  
        Params:
            min_char_count: Minimum character count for the article to be considered valid.
            snippet_chunk_size: Maximum character count for each snippet.
            webpage_helper_max_threads: Maximum number of threads to use for webpage helper.
            **kwargs: Additional parameters for the OpenAI API.
        r   r   )DDGSz4Duckduckgo requires `pip install duckduckgo_search`.NrT   apic                      yr   r   r   s    r   r   z-DuckDuckGoSearchRM.__init__.<locals>.<lambda>
  r   r   )r   r   duckduckgo_searchr   ImportErrorr   r	   rZ   r!   duck_duck_go_backendduck_duck_go_safe_searchduck_duck_go_regionr   ddgs)r"   r   r   rM   rN   r\   r   r   r   errr#   s             r   r   zDuckDuckGoSearchRM.__init__  s    " 	1	.
 +)15

 

 %*! )4% $*  #2D #1D  F	=  	F	s   A. .	B7BBc                 0    | j                   }d| _         d|iS )Nr   DuckDuckGoRMr%   r&   s     r   r'   z&DuckDuckGoSearchRM.get_usage_and_reset      


&&r      )max_time	max_tries
on_backoffgiveupr=   c                 j    | j                   j                  || j                  | j                        }|S )N)max_resultsbackend)r  textr   r   )r"   r=   r>   s      r   r   zDuckDuckGoSearchRM.request  s3     ))..tvvt/H/H ! 
 r   r(   r)   c           
         t        |t              r|gn|}| xj                  t        |      z  c_        g }|D ]  }| j	                  |      }|D ]  }t        |t
              st        d| d       #	 |j                  dd      }|j                  dd      }	|j                  d|	      }
|j                  dd      g}t        ||	|
|g      st        d|       | j                  |      r||vr||	|
|d	}|j                  |       nt        d
| d         |S # t        $ r-}t        dd| d       t        d| d|        Y d}~d}~ww xY w)a  Search with DuckDuckGoSearch for self.k top passages for query or queries
        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.
        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        Invalid result: 
hrefNrg   rh   bodyMissing key(s) in result: r.   rg   rh   ri   invalid source  or url in exclude_urls$Error occurs when processing result=r0   r/   )r1   r2   r!   r3   r   dictr   r   allr   r   r6   r8   )r"   r(   r)   r;   r<   r=   r>   rn   r.   rg   rh   ri   r   rA   s                 r   rB   zDuckDuckGoSearchRM.forward"  s    *C0 ! 	
 	

c'l"
  	MEll5)G M!!T*,QCr23M%%-CEE'40E"#%%u"=K !fd 34H UKBC(+EaS)IJJ++C0S5L#&%*+6(0	" *008u4KLM3M	 	MD ! 	 ! MA&"QCrJK>ugRsKLLMs   2B#D	E$#EE)rD   Nrp   rq   rr   Onzus-en)rF   rG   rH   r   ru   r   r2   r   r'   backoffon_exceptionexpor8   r   r   r   r   r   rB   rI   rJ   s   @r   r   r     s    D $(!"&#%22 "2 	2
  2 2 2h'
 W	S  RT5! %c49n 55!EI#Y5!r   r   c            	       t     e Zd ZdZ	 	 	 	 	 	 	 ddedededef fdZd Zg fdee	e
e	   f   d	e
e	   fd
Z xZS )TavilySearchRMzRetrieve information from custom queries using Tavily. Documentation and examples can be found at https://docs.tavily.com/docs/python-sdk/tavily-search/examplesr   r   rM   rN   c                    t         
|   |       	 ddlm} |s*t
        j                  j                  d      st        d      |r|| _	        nt
        j                  d   | _	        || _
        t        |||      | _        d| _         || j                  	      | _        || _        |r|| _        yd
 | _        y# t        $ r}	t	        d      |	d}	~	ww xY w)a  
        Params:
            tavily_search_api_key str: API key for tavily that can be retrieved from https://tavily.com/
            min_char_count: Minimum character count for the article to be considered valid.
            snippet_chunk_size: Maximum character count for each snippet.
            webpage_helper_max_threads: Maximum number of threads to use for webpage helper.
            include_raw_content bool: Boolean that is used to determine if the full text should be returned.
        r   r   )TavilyClientz,Tavily requires `pip install tavily-python`.NTAVILY_API_KEYzPYou must supply tavily_search_api_key or set environment variable TAVILY_API_KEYrT   )r   c                      yr   r   r   s    r   r   z)TavilySearchRM.__init__.<locals>.<lambda>  r   r   )r   r   tavilyr#  r   r   r   r   r   tavily_search_api_keyr   r	   rZ   r!   tavily_clientinclude_raw_contentr   )r"   r'  r   r   rM   rN   r\   r)  r#  r  r#   s             r   r   zTavilySearchRM.__init__]  s    $ 	1	W+ %RZZ^^<L-Mb  #)>D&)+4D)ED&+)15
 
 *$2L2LM#6  #2D #1D ?  	WLMSVV	Ws   B: :	CCCc                 0    | j                   }d| _         d|iS )Nr   r!  r%   r&   s     r   r'   z"TavilySearchRM.get_usage_and_reset  s    


 %((r   r(   r)   c           
         t        |t              r|gn|}| xj                  t        |      z  c_        g }|D ]V  }| j                  | j
                  d}| j                  j                  |      }|j                  d      }|D ]  }	t        |	t              st        d|	 d       $	 |	j                  dd      }
|	j                  dd      }|	j                  dd      }g }|	j                  d	      r!|j                  |	j                  d	             n |j                  |	j                  d             t        |
|||g      st        d
|	       | j                  |
      r|
|vr|
|||d}|j                  |       nt        d|
 d        Y |S # t        $ r.}t        dd| d       t        d| d|        Y d}~Cd}~ww xY w)a  Search with TavilySearch for self.k top passages for query or queries
        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.
        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        )r  include_raw_contentsr>   r  r  r.   Nrg   r   raw_body_contentr  r  r  r  r  r0   r/   )r1   r2   r!   r3   r   r)  r(  r   r   r  r   r6   r  r   r   r8   )r"   r(   r)   r;   r<   r=   argsresponseDatar>   rn   r.   rg   rh   ri   r   rA   s                   r   rB   zTavilySearchRM.forward  s    *C0 ! 	
 	

c'l"
 (	ME#vv(,(@(@D
  --44U;L"&&y1G  M!!T*,QCr23M%%t,CEE'40E"#%%	4"8K!Huu/0 .@(AB i(89 UKBC(+EaS)IJJ++C0S5L#&%*+6(0	" *008u4KLM; M(	MT ! 	 ! MA&"QCrJK>ugRsKLLMs   (C$F	G#GG)NrD   Nrp   rq   rr   F)rF   rG   rH   r   ru   r   r   r'   r   r2   r   rB   rI   rJ   s   @r   r!  r!  Z  s     k #$(!"&#%!42 42 "	42
 42  42l) RT=! %c49n 5=!EI#Y=!r   r!  c                   l     e Zd Z	 	 	 	 	 	 	 d	dededef fdZd Zg fdeee	e   f   de	e   fdZ
 xZS )
GoogleSearchr   rM   rN   c                    t         
|   |       	 ddlm} |s*t
        j                  j                  d      st        d      |s*t
        j                  j                  d      st        d	      |xs t
        j                  d   | _	        |xs t
        j                  d   | _
        |r|| _        nd
 | _         |dd| j                        | _        t        |||      | _        d| _        y# t        $ r}	t	        d      |	d}	~	ww xY w)a  
        Params:
            google_search_api_key: Google API key. Check out https://developers.google.com/custom-search/v1/overview
                "API key" section
            google_cse_id: Custom search engine ID. Check out https://developers.google.com/custom-search/v1/overview
                "Search engine ID" section
            k: Number of top results to retrieve.
            is_valid_source: Optional function to filter valid sources.
            min_char_count: Minimum character count for the article to be considered valid.
            snippet_chunk_size: Maximum character count for each snippet.
            webpage_helper_max_threads: Maximum number of threads to use for webpage helper.
        r   r   )buildz=GoogleSearch requires `pip install google-api-python-client`.NGOOGLE_SEARCH_API_KEYz[You must supply google_search_api_key or set the GOOGLE_SEARCH_API_KEY environment variableGOOGLE_CSE_IDzKYou must supply google_cse_id or set the GOOGLE_CSE_ID environment variablec                      yr   r   r   s    r   r   z'GoogleSearch.__init__.<locals>.<lambda>  r   r   customsearchv1)developerKeyrT   )r   r   googleapiclient.discoveryr3  r   r   r   r   r   google_search_api_keygoogle_cse_idr   servicer	   rZ   r!   )r"   r;  r<  r   r   rM   rN   r\   r3  r  r#   s             r   r   zGoogleSearch.__init__  s   , 	1	7
 %RZZ^^<S-Tm  RZZ^^O%D] 
 "HRZZ0G%H 	" +Ibjj.I#2D #1D Dt/I/I
 ,)15

 
?  	O	s   C0 0	D
9DD
c                 0    | j                   }d| _         d|iS )Nr   r1  r%   r&   s     r   r'   z GoogleSearch.get_usage_and_reset  r  r   r(   r)   c                    t        |t              r|gn|}| xj                  t        |      z  c_        i }|D ]  }	 | j                  j                         j                  || j                  | j                        j                         }|j                  dg       D ]A  }| j                  |d         s|d   |vs |d   |d   |j                  dd      d||d   <   C  | j                  j!                  t        |j#                                     }	g }
|	D ]#  }||   }|	|   d   |d<   |
j%                  |       % |
S # t        $ r&}t        j                  d| d	|        Y d
}~2d
}~ww xY w)a  Search using Google Custom Search API for self.k top results for query or queries.

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of URLs to exclude from the search results.

        Returns:
            A list of dicts, each dict has keys: 'title', 'url', 'snippet', 'description'.
        )ra   cxr   r   r   rg   rf   r   )rg   r.   rh   z%Error occurred while searching query r0   Nri   )r1   r2   r!   r3   r=  cserk   r<  r   executer   r   r8   r9   r:   rZ   rj   rl   r6   )r"   r(   r)   r;   rm   r=   r   itemrA   ro   r<   r.   r@   s                r   rB   zGoogleSearch.forward  s    *C0 ! 	
 	

c'l"
 	TETLL$$&T-- FF  
 WY  %LL"5 
D,,T&\: L< &*']#'<+/88Ir+B	8tF|4
	T6 !% 3 3 D D$$&'!
 ( 	(Cs#A1#6zBAjM$$Q'	(
 !   T EeWBqcRSSTs$   A7E 4E <$E  	E/	E**E/)NNrD   Nrp   rq   rr   rt   rJ   s   @r   r1  r1    sp     #
$(!"&#%8
 "8 8  8t' RT9! %c49n 59!EI#Y9!r   r1  c                   d     e Zd ZdZ	 	 	 	 	 ddef fdZd Zg fdeee	e   f   de	e   fdZ
 xZS )	AzureAISearcha.  Retrieve information from custom queries using Azure AI Search.

    General Documentation: https://learn.microsoft.com/en-us/azure/search/search-create-service-portal.
    Python Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python.
    r   c                    t         	|   |       	 ddlm} ddlm} |s*t        j                  j                  d      st        d      |r|| _        nt        j                  d   | _        |s*t        j                  j                  d	      st        d
      |r|| _        nt        j                  d	   | _        |s*t        j                  j                  d      st        d      |r|| _        nt        j                  d   | _        d| _        |r|| _        yd | _        y# t        $ r}t        d      |d}~ww xY w)a  
        Params:
            azure_ai_search_api_key: Azure AI Search API key. Check out https://learn.microsoft.com/en-us/azure/search/search-security-api-keys?tabs=rest-use%2Cportal-find%2Cportal-query
                "API key" section
            azure_ai_search_url: Custom Azure AI Search Endpoint URL. Check out https://learn.microsoft.com/en-us/azure/search/search-create-service-portal#name-the-service
            azure_ai_search_index_name: Custom Azure AI Search Index Name. Check out https://learn.microsoft.com/en-us/azure/search/search-how-to-create-search-index?tabs=portal
            k: Number of top results to retrieve.
            is_valid_source: Optional function to filter valid sources.
            min_char_count: Minimum character count for the article to be considered valid.
            snippet_chunk_size: Maximum character count for each snippet.
            webpage_helper_max_threads: Maximum number of threads to use for webpage helper.
        r   r   AzureKeyCredentialSearchClient<AzureAISearch requires `pip install azure-search-documents`.NAZURE_AI_SEARCH_API_KEYz[You must supply azure_ai_search_api_key or set environment variable AZURE_AI_SEARCH_API_KEYAZURE_AI_SEARCH_URLzSYou must supply azure_ai_search_url or set environment variable AZURE_AI_SEARCH_URLAZURE_AI_SEARCH_INDEX_NAMEzaYou must supply azure_ai_search_index_name or set environment variable AZURE_AI_SEARCH_INDEX_NAMEc                      yr   r   r   s    r   r   z(AzureAISearch.__init__.<locals>.<lambda>  r   r   )r   r   azure.core.credentialsrH  azure.search.documentsrJ  r   r   r   r   r   azure_ai_search_api_keyazure_ai_search_urlazure_ai_search_index_namer!   r   )
r"   rR  rS  rT  r   r   rH  rJ  r  r#   s
            r   r   zAzureAISearch.__init__[  s2   ( 	1	A; 'rzz~~%0
 m  %+BD(+-::6O+PD("2::>>:O+Pe  !':D$')zz2G'HD$)"**..(3
 s  (.HD+.0jj9U.VD+
 #2D #1D U  	N	s   D# #	D=,D88D=c                 0    | j                   }d| _         d|iS )Nr   rE  r%   r&   s     r   r'   z!AzureAISearch.get_usage_and_reset  s    


''r   r(   r)   c                 
   	 ddl m} ddlm} t        |t              r|gn|}| xj                  t        |      z  c_        g } || j                  | j                   || j                              }|D ]?  }		 |j                  |	d      }
|
D ]$  }|d   |d	   d
|d   gd}|j                  |       & A |S # t        $ r}t	        d      |d}~ww xY w# t        $ r%}t        j                   d|	 d|        Y d}~d}~ww xY w)a  Search with Azure Open AI for self.k top passages for query or queries

        Args:
            query_or_queries (Union[str, List[str]]): The query or queries to search for.
            exclude_urls (List[str]): A list of urls to exclude from the search results.

        Returns:
            a list of Dicts, each dict has keys of 'description', 'snippets' (list of strings), 'title', 'url'
        r   rG  rI  rK  Nr   )search_texttopmetadata_storage_pathrg   r   chunkr  r/   r0   )rP  rH  rQ  rJ  r   r1   r2   r!   r3   rS  rT  rR  r   r6   r8   r9   r:   )r"   r(   r)   rH  rJ  r  r;   r<   r   r=   r>   r   documentrA   s                 r   rB   zAzureAISearch.forward  s:   	A; *C0 ! 	
 	

c'l"
$$++t;;<

  	QEQ --Eq-A% 7F%&=>!'',%+G_$5	 H &,,X67	Q  ! C  	N	<  Q B5'A3OPPQs/   B7 7<C7	C CC	DC==D)NNNrD   N)rF   rG   rH   r   r   r   r'   r   r2   r   rB   rI   rJ   s   @r   rE  rE  T  s]     !% #'
$(C2 "C2J( RT0! %c49n 50!EI#Y0!r   rE  )r9   r   typingr   r   r   r  dspyr4   dspr   r   utilsr	   Retriever   rL   rw   r   r   r   r   r   r!  r1  rE  r   r   r   <module>ra     s     	 ( (    )  =!DMM =!@c! c!L^!t}} ^!B?!$-- ?!Da!t}} a!HG!dmm G!TQ!dmm Q!h! !D{!T]] {!|y!4== y!xB!DMM B!r   