
    Ig3                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ  ej"                  e      Z G d de      Zy)    N)AnyDictIteratorList)Document)	BaseModelmodel_validatorc                   ^   e Zd ZU dZeed<   dZeed<   dZeed<   dZ	e
ed<   d	Zeed
<   dZe
ed<   dZe
ed<   dZe
ed<   dZeed<    ed      ededefd              ZdedefdZdedee   fdZdedee   fdZdedefdZdedee   fdZdedee   fdZd ed!edefd"Zd ed#edefd$Z y%)&PubMedAPIWrappera(  
    Wrapper around PubMed API.

    This wrapper will use the PubMed API to conduct searches and fetch
    document summaries. By default, it will return the document summaries
    of the top-k results of an input search.

    Parameters:
        top_k_results: number of the top-scored document used for the PubMed tool
        MAX_QUERY_LENGTH: maximum length of the query.
          Default is 300 characters.
        doc_content_chars_max: maximum length of the document content.
          Content will be truncated if it exceeds this length.
          Default is 2000 characters.
        max_retry: maximum number of retries for a request. Default is 5.
        sleep_time: time to wait between retries.
          Default is 0.2 seconds.
        email: email address to be used for the PubMed API.
    parsez;https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?base_url_esearchz:https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?base_url_efetch   	max_retryg?
sleep_time   top_k_resultsi,  MAX_QUERY_LENGTHi  doc_content_chars_maxzyour_email@example.comemailbefore)modevaluesreturnc                 ^    	 ddl }|j                  |d<   |S # t        $ r t        d      w xY w)z7Validate that the python package exists in environment.r   Nr   zZCould not import xmltodict python package. Please install it with `pip install xmltodict`.)	xmltodictr   ImportError)clsr   r   s      a/var/www/html/answerous/venv/lib/python3.12/site-packages/langchain_community/utilities/pubmed.pyvalidate_environmentz%PubMedAPIWrapper.validate_environment3   sB    	'ooF7O   	B 	s    ,queryc                    	 | j                  |d| j                         D cg c]  }d|d    d|d    d|d    d|d	     }}|rd
j                  |      d| j                   S dS c c}w # t        $ r}d| cY d}~S d}~ww xY w)z
        Run PubMed search and get the article meta information.
        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
        It uses only the most informative fields of article meta information.
        NzPublished: 	Publishedz
Title: Titlez
Copyright Information: Copyright Informationz
Summary::
Summaryz

zNo good PubMed Result was foundzPubMed exception: )loadr   joinr   	Exception)selfr!   resultdocsexs        r   runzPubMedAPIWrapper.runB   s    	- #ii.E0E0E(FG
 	 f[12 3 /* +**01H*I)J K$Y/02D   D!">D$>$>? 7  	-'t,,	-s4    A, !A'!A, %A, 'A, ,	B5A?9B?Bc              #     K   | j                   dz   t        t        j                  j	                  |      h      z   d| j
                   dz   }t        j                  j                  |      }|j                         j                  d      }t        j                  |      }|d   d   }|d   d   D ]  }| j                  ||        yw)	z
        Search PubMed for documents matching the query.
        Return an iterator of dictionaries containing the document metadata.
        zdb=pubmed&term=z&retmode=json&retmax=z&usehistory=yutf-8esearchresultwebenvidlistN)r   strurllibr   quoter   requesturlopenreaddecodejsonloadsretrieve_article)r*   r!   urlr+   text	json_textr2   uids           r   	lazy_loadzPubMedAPIWrapper.lazy_load\   s      !! 6<<%%e,-./ &d&8&8%9GH 	 '',{{}##G,JJt$	?+H5_-h7 	5C''V44	5s   CC
c                 6    t        | j                  |            S )z
        Search PubMed for documents matching the query.
        Return a list of dictionaries containing the document metadata.
        )listrB   r*   r!   s     r   r'   zPubMedAPIWrapper.loadp   s    
 DNN5)**    docc                 >    |j                  d      }t        ||      S )Nr&   )page_contentmetadata)popr   )r*   rG   summarys      r   _dict2documentzPubMedAPIWrapper._dict2documentw   s    '')$Ws;;rF   c              #   `   K   | j                  |      D ]  }| j                  |        y wN)r!   )rB   rM   )r*   r!   ds      r   lazy_load_docszPubMedAPIWrapper.lazy_load_docs{   s1     e, 	)A%%a((	)s   ,.c                 8    t        | j                  |            S rO   )rD   rQ   rE   s     r   	load_docszPubMedAPIWrapper.load_docs   s    D''e'455rF   rA   r2   c                 >   | j                   dz   |z   dz   |z   }d}	 	 t        j                  j                  |      }	 |j                         j                  d
      }| j                  |      }| j                  ||      S # t        j                  j
                  $ r~}|j                  dk(  rc|| j                  k  rTt        d| j                  dd       t        j                  | j                         | xj                  dz  c_	        |d	z  }n|Y d }~nd }~ww xY w)Nzdb=pubmed&retmode=xml&id=z&webenv=r   i  zToo Many Requests, waiting for z.2fz seconds...      r0   )r   r5   r7   r8   error	HTTPErrorcoder   printr   timesleepr9   r:   r   _parse_article)	r*   rA   r2   r>   retryr+   exml_text	text_dicts	            r   r=   z!PubMedAPIWrapper.retrieve_article   s      )*  	 	 //4 ;;=''0JJx(	""3	22! <<)) 66S=UT^^%; ''+s&;;H JJt/OOq(OQJEG 	 s   A? ?DA4DDra   c                    	 |d   d   d   d   }|j                  di       j                  dg       }|D cg c]  }d	|v rd
|v r|d
    d|d	     }}|rdj                  |      nIt        |t              r|n7t        |t
              r&dj                  d |j                         D              nd}|j                  di       }dj                  |j                  dd      |j                  dd      |j                  dd      g      }	||j                  dd      |	|j                  di       j                  dd      |dS # t         $ r |d   d   d   }Y Fw xY wc c}w )NPubmedArticleSetPubmedArticleMedlineCitationArticlePubmedBookArticleBookDocumentAbstractAbstractTextz#textz@Labelz: 
c              3   2   K   | ]  }t        |        y w)N)r4   ).0values     r   	<genexpr>z2PubMedAPIWrapper._parse_article.<locals>.<genexpr>   s     MUc%jMs   zNo abstract availableArticleDate-Year MonthDayArticleTitleCopyrightInformation)rA   r$   r#   r%   r&   )KeyErrorgetr(   
isinstancer4   dictr   )
r*   rA   ra   arabstract_texttxt	summariesrL   a_dpub_dates
             r   r]   zPubMedAPIWrapper._parse_article   s   	T-.?@QRB
 z2.22>2F %
#~(c/ 8}oRG~.
	 
  IIi  mS1  "-6 IIMm6J6J6LMM0 	 ff]B'88WWVR #'''2"6r8JK

 VVNB/!%'VVJ%;%?%?&& 
 	
5  	T-./BCNSB	T
s   D? E?EEN)!__name__
__module____qualname____doc__r   __annotations__r   r4   r   r   intr   floatr   r   r   r   r	   classmethodr   r    r.   r   r{   rB   r   r'   r   rM   rQ   rS   r=   r]    rF   r   r   r      sR   ( J 	F c  XOSWIsJ M3c!%3%)E3)(#$ 3   $- - -45s 5x~ 5(+# +$t* +<$ <8 <)C )HX,> )6s 6tH~ 63C 3 3 3@'
# '
$ '
4 '
rF   r   )r;   loggingr[   urllib.errorr5   urllib.parseurllib.requesttypingr   r   r   r   langchain_core.documentsr   pydanticr   r	   	getLoggerr   loggerr   r   rF   r   <module>r      sC          , , - /			8	$z
y z
rF   