
    R"hR                        d dl Zd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZ ddlmZ  ej*                  ej,                  d        ej.                  e      Zerdd	lmZ  G d
 de	      Z G d d      Z G d d      Z G d de	      Z G d d      Z  G d de	      Z! G d de	      Z" G d de	      Z# G d de	      Z$d Z% G d de	      Z& G d d e	      Z' G d! d"e	      Z(y)#    N)ABCabstractmethod)OrderedDict)DictListOptionalUnionTYPE_CHECKING   )ArticleTextProcessingz(%(name)s : %(levelname)-8s : %(message)s)levelformat)LoggingWrapperc                   &    e Zd ZdZd Zed        Zy)InformationTableaO  
    The InformationTable class serves as data class to store the information
    collected during KnowledgeCuration stage.

    Create subclass to incorporate more information as needed. For example,
    in STORM paper https://arxiv.org/pdf/2402.14207.pdf, additional information
    would be perspective guided dialogue history.
    c                      y N selfs    :/var/www/html/sandstorm/storm/knowledge_storm/interface.py__init__zInformationTable.__init__!           c                       y r   r   )kwargss    r   retrieve_informationz%InformationTable.retrieve_information$       r   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r      s       r   r   c                   L    e Zd ZdZddZd Zd Zd Zd Zd Ze	d	        Z
d
 Zy)Informationa  Class to represent detailed information.

    Inherits from Information to include a unique identifier (URL), and extends
    it with a description, snippets, and title of the storm information.

    Attributes:
        description (str): Brief description.
        snippets (list): List of brief excerpts or snippets.
        title (str): The title or headline of the information.
        url (str): The unique URL (serving as UUID) of the information.
    Nc                 `    || _         || _        || _        || _        ||ni | _        d| _        y)a`  Initialize the Information object with detailed attributes.

        Args:
            url (str): The unique URL serving as the identifier for the information.
            description (str): Detailed description.
            snippets (list): List of brief excerpts or snippet.
            title (str): The title or headline of the information.
        N)descriptionsnippetstitleurlmetacitation_uuid)r   r*   r'   r(   r)   r+   s         r   r   zInformation.__init__6   s7     ' 
 ,D"	r   c                 h    t        | j                  t        t        | j                              f      S r   )hashr*   tuplesortedr(   r   s    r   __hash__zInformation.__hash__F   s,    fT]]+,
 	
r   c                     t        |t              sy| j                  |j                  k(  xrN t        | j                        t        |j                        k(  xr! | j                         |j                         k(  S )NF)
isinstancer$   r*   setr(   	_meta_str)r   others     r   __eq__zInformation.__eq__N   s^    %-HH		! 6DMM"c%..&996 EOO$55	
r   c           
          t        | j                  | j                  t        t	        | j
                              | j                         f      d      S )N   )int	_md5_hashr*   r/   r0   r(   r5   r   s    r   r1   zInformation.__hash__W   s>    NNDHHeF4==,A&BDNNDTUV
 	
r   c                 z    d| j                   j                  dd       d| j                   j                  dd       S )z>Generate a string representation of relevant meta information.z
Question: question z	, Query: query)r+   getr   s    r   r5   zInformation._meta_str]   s7    DIIMM*b9:)DIIMMRY[]D^C_``r   c                     t        |t        t        t        f      rt	        j
                  |d      }t        j                  t        |      j                  d            j                         S )z'Generate an MD5 hash for a given value.T)	sort_keyszutf-8)r3   dictlistr/   jsondumpshashlibmd5strencode	hexdigest)r   values     r   r;   zInformation._md5_hasha   sJ    edD%01JJu5E{{3u:,,W56@@BBr   c           
           | |d   |d   |d   |d   |j                  dd            }t        |j                  dd	            |_        |S )
a~  Create a Information object from a dictionary.
           Usage: info = Information.from_dict(storm_info_dict)

        Args:
            info_dict (dict): A dictionary containing keys 'url', 'description',
                              'snippets', and 'title' corresponding to the object's attributes.

        Returns:
            Information: An instance of Information.
        r*   r'   r(   r)   r+   N)r*   r'   r(   r)   r+   r,   r&   )r@   r:   r,   )cls	info_dictinfos      r   	from_dictzInformation.from_dictg   s[     % !-0z*G$vt,
 !!CDr   c                     | j                   | j                  | j                  | j                  | j                  | j
                  dS )Nr*   r'   r(   r)   r+   r,   rS   r   s    r   to_dictzInformation.to_dict}   s:    88++ZZII!//
 	
r   r   )r   r    r!   r"   r   r1   r7   r5   r;   classmethodrQ   rT   r   r   r   r$   r$   )   s@    
  


aC  *
r   r$   c                   ,    e Zd ZdZddefdZddZd Zy)	ArticleSectionNodez
    The ArticleSectionNode is the dataclass for handling the section of the article.
    The content storage, section writing preferences are defined in this node.
    Nsection_namec                 <    || _         || _        g | _        d| _        y)z
        section_name: section heading in string format. E.g. Introduction, History, etc.
        content: content of the section. Up to you for design choice of the data structure.
        N)rX   contentchildren
preference)r   rX   rZ   s      r   r   zArticleSectionNode.__init__   s!    
 )r   c                 x    |r| j                   j                  d|       y | j                   j                  |       y )Nr   )r[   insertappend)r   new_child_nodeinsert_to_fronts      r   	add_childzArticleSectionNode.add_child   s+    MM  N3MM  0r   c                 :    | j                   j                  |       y r   )r[   remove)r   childs     r   remove_childzArticleSectionNode.remove_child   s    U#r   r   )F)r   r    r!   r"   rI   r   rb   rf   r   r   r   rW   rW      s    
S 1$r   rW   c                       e Zd Zd Zdededee   fdZedefd       Z	d Z
dee   fdZeed	ed
efd              ZddZy)Articlec                 $    t        |      | _        y r   )rW   root)r   
topic_names     r   r   zArticle.__init__   s    &z2	r   nodenamereturnc                 x    |j                   |k(  r|S |j                  D ]  }| j                  ||      }|s|c S  y)a  
        Return the node of the section given the section name.

        Args:
            node: the node as the root to find.
            name: the name of node as section name

        Return:
            reference of the node or None if section name has no match
        N)rX   r[   find_section)r   rl   rm   re   results        r   rp   zArticle.find_section   sI     $K]] 	E&&ud3F	 r   c                      y)zC
        Export Article object into string representation.
        Nr   r   s    r   	to_stringzArticle.to_string   s    r   c                 Z    dt         t        t         f   ffd | j                        S )aS  
        Generates a hierarchical tree structure representing the outline of the document.

        Returns:
            Dict[str, Dict]: A nested dictionary representing the hierarchical structure of the document's outline.
                             Each key is a section name, and the value is another dictionary representing the child sections,
                             recursively forming the tree structure of the document's outline. If a section has no subsections,
                             its value is an empty dictionary.

        Example:
            Assuming a document with a structure like:
            - Introduction
                - Background
                - Objective
            - Methods
                - Data Collection
                - Analysis
            The method would return:
            {
                'Introduction': {
                    'Background': {},
                    'Objective': {}
                },
                'Methods': {
                    'Data Collection': {},
                    'Analysis': {}
                }
            }
        rn   c                 `    i }| j                   D ]  } |      ||j                  <    |r|S i S r   )r[   rX   )rl   treere   
build_trees      r   rw   z,Article.get_outline_tree.<locals>.build_tree   s=    D =+5e+<U''(=4'R'r   )r   rI   rj   )r   rw   s    @r   get_outline_treezArticle.get_outline_tree   s(    >	(S$Y 	( $))$$r   c                 h    | j                   j                  D cg c]  }|j                   c}S c c}w )z/
        Get first level section names
        )rj   r[   rX   )r   is     r   get_first_level_section_namesz%Article.get_first_level_section_names   s&     )-		(:(:;1;;;s   /rk   article_textc                      y)zH
        Create an instance of the Article object from a string
        Nr   )rN   rk   r|   s      r   from_stringzArticle.from_string   s     	r   Nc                     || j                   }|j                  D cg c]  }| j                  |      s| c}|j                  d d  |j                  |j                  dk(  r|j                  sy |S c c}w )Nr>   )rj   r[   prune_empty_nodesrZ   )r   rl   re   s      r   r   zArticle.prune_empty_nodes   sk    <99D  $}}
0F0Fu0ME
a LL DLLB$6K
s
   A2A2r   )r   r    r!   r   rW   rI   r   rp   r   rs   rx   r   r{   rU   r~   r   r   r   r   rh   rh      s    3&.1	$	%* 3  
%%N<tCy < S    r   rh   c                   p    e Zd ZdZddej
                  defdZd Zg fde	e
ee
   f   dee
   dee   fd	Zy
)	Retrievera  
    An abstract base class for retriever modules. It provides a template for retrieving information based on a query.

    This class should be extended to implement specific retrieval functionalities.
    Users can design their retriever modules as needed by implementing the retrieve method.
    The retrieval model/search engine used for each part should be declared with a suffix '_rm' in the attribute name.
    rm
max_threadc                      || _         || _        y r   )r   r   )r   r   r   s      r   r   zRetriever.__init__  s    $r   c                     g }t        t        | d      d      r)|j                  t        | d      j                                i }|D ]1  }|j	                         D ]  \  }}||vr|||<   ||xx   |z  cc<    3 |S )Nr   get_usage_and_reset)hasattrgetattrr_   r   items)r   combined_usagename_to_usageusage
model_name	query_cnts         r   collect_and_reset_rm_usagez$Retriever.collect_and_reset_rm_usage  s    74&(=>!!'$"5"I"I"KL# 	;E). ;%
I]209M*-!*-:-	;	; r   r?   exclude_urlsrn   c                 (    t        |t              r|n|g}g } fd}t        j                  j	                   j
                        5 }t        |j                  ||            }d d d        D ]  }|j                  |        |S # 1 sw Y   #xY w)Nc                 &   j                  | g      }g }|D ]t  }t        t        |d               D ]#  }t        j                  |d   |         |d   |<   % t
        j                  |      }| |j                  d<   |j                  |       v |S )N)query_or_queriesr   r(   r?   )	r   rangelenr   remove_citationsr$   rQ   r+   r_   )qretrieved_data_listlocal_to_returndatarz   
storm_infor   r   s         r   process_queryz)Retriever.retrieve.<locals>.process_query&  s    "&''"#< #* # !O+ 	3s4
#345 A +@*P*PZ(++D$Q' )2248
+,
(&&z2	3 #"r   )max_workers)r3   rD   
concurrentfuturesThreadPoolExecutorr   mapextend)	r   r?   r   queries	to_returnr   executorresultsrq   s	   ` `      r   retrievezRetriever.retrieve   s     &eT2%		#" 22 3 
 	A8<<w?@G	A
  	%FV$	% 	A 	As   
BBN)r   )r   r    r!   r"   dspyRetriever:   r   r   r	   rI   r   r$   r   r   r   r   r   r     s]    4== c   GI3S	>*:>s)	k	r   r   c                   2    e Zd ZdZdefdZedefd       Zy)KnowledgeCurationModulez`
    The interface for knowledge curation stage. Given topic, return collected information.
    	retrieverc                     || _         y)z7
        Store args and finish initialization.
        N)r   )r   r   s     r   r   z KnowledgeCurationModule.__init__G  s     #r   rn   c                      y)z
        Curate information and knowledge for the given topic

        Args:
            topic: topic of interest in natural language.

        Returns:
            collected_information: collected information in InformationTable type.
        Nr   )r   topics     r   researchz KnowledgeCurationModule.researchM  s     	r   N)	r   r    r!   r"   r   r   r   r   r   r   r   r   r   r   B  s/    #) # 
!1 
 
r   r   c                   .    e Zd ZdZedededefd       Zy)OutlineGenerationModulez
    The interface for outline generation stage. Given topic, collected information from knowledge
    curation stage, generate outline for the article.
    r   information_tablern   c                      y)a~  
        Generate outline for the article. Required arguments include:
            topic: the topic of interest
            information_table: knowledge curation data generated from KnowledgeCurationModule

        More arguments could be
            1. draft outline
            2. user provided outline

        Returns:
            article_outline of type ArticleOutline
        Nr   )r   r   r   r   s       r   generate_outlinez(OutlineGenerationModule.generate_outlinea  s      	r   N)	r   r    r!   r"   r   rI   r   rh   r   r   r   r   r   r   [  s3    
 -=	 r   r   c            	       2    e Zd ZdZededededefd       Zy)ArticleGenerationModule
    The interface for article generation stage. Given topic, collected information from
    knowledge curation stage, generated outline from outline generation stage,
    r   r   article_with_outlinern   c                      y)a$  
        Generate article. Required arguments include:
            topic: the topic of interest
            information_table: knowledge curation data generated from KnowledgeCurationModule
            article_with_outline: article with specified outline from OutlineGenerationModule
        Nr   )r   r   r   r   r   s        r   generate_articlez(ArticleGenerationModule.generate_articlez  s     	r   N)	r   r    r!   r"   r   rI   r   rh   r   r   r   r   r   r   t  sA    
  , &	 
 r   r   c                   .    e Zd ZdZedededefd       Zy)ArticlePolishingModuler   r   draft_articlern   c                      y)z
        Polish article. Required arguments include:
            topic: the topic of interest
            draft_article: draft article from ArticleGenerationModule.
        Nr   )r   r   r   r   s       r   polish_articlez%ArticlePolishingModule.polish_article  s     	r   N)r   r    r!   r"   r   rI   rh   r   r   r   r   r   r     s0    
 C  g  r   r   c                 B     t        j                          fd       }|S )z2Decorator to log the execution time of a function.c                     t        j                          } | g|i |}t        j                          }||z
  }t        j                  j                   d|dd       || j                   j                  <   |S )N executed in .4f seconds)timeloggerrP   r   )r   argsr   
start_timerq   end_timeexecution_timefuncs          r   wrapperz#log_execution_time.<locals>.wrapper  sn    YY[
d,T,V,99;!J.t}}o]>#2FhOP#1		$-- r   	functoolswraps)r   r   s   ` r   log_execution_timer     s'     __T  Nr   c                   .    e Zd ZdZd Zd Zd Zd Zd Zy)	LMConfigszAbstract base class for language model configurations of the knowledge curation engine.

    The language model used for each part should be declared with a suffix '_lm' in the attribute name.
    c                      y r   r   r   s    r   r   zLMConfigs.__init__  r   r   c                     | j                   D ]0  }d|v st        | |      t        j                  d| d| d       2 y )N_lmzLanguage model for z% is not initialized. Please call set_z())__dict__r   loggingwarningr   	attr_names     r   
init_checkzLMConfigs.init_check  sK     	I	!gdI&>&F))4YZcYddfg	r   c                     g }| j                   D ]T  }d|v st        t        | |      d      s|j                  t        | |      j                         g t        | |      _        V |S )Nr   history)r   r   r   r   r   )r   r   r   s      r   collect_and_reset_lm_historyz&LMConfigs.collect_and_reset_lm_history  sa     	6I	!ggdI.F	&RwtY7??@35i(0	6
 r   c                 X   g }| j                   D ]G  }d|v st        t        | |      d      s|j                  t        | |      j	                                I i }|D ]J  }|j                         D ]5  \  }}||vr|||<   ||   dxx   |d   z  cc<   ||   dxx   |d   z  cc<   7 L |S )Nr   r   prompt_tokenscompletion_tokens)r   r   r   r_   r   r   )r   r   r   model_name_to_usager   r   tokenss          r   collect_and_reset_lm_usagez$LMConfigs.collect_and_reset_lm_usage  s     	VI	!gi(*?' %%gdI&>&R&R&TU		V !# 
	E&+kkm 	"
F%886<'
3'
3OD'I D (
34GHF+M H	
	 #"r   c                     t        | j                  D ci c]3  }d|v r-t        t        | |      d      r|t        | |      j                  5 c}      S c c}w )Nr   r   )r   r   r   r   r   r   s     r   logzLMConfigs.log  sZ     "&I%''$	2JH*U 743:::
 	
s   8AN)	r   r    r!   r"   r   r   r   r   r   r   r   r   r   r     s     
#.
r   r   c                       e Zd ZdefdZd Zd Zedee	   fd       Z
edefd       Zedefd       Zedefd	       Zed
        Zd Zd Zy)Engine
lm_configsc                 <    || _         i | _        i | _        i | _        y r   )r   r   lm_costrm_cost)r   r   s     r   r   zEngine.__init__  s    $	r   c                 F     t        j                         fd       }|S )zcDecorator to log the execution time, language model usage, and retrieval model usage of a function.c                     t        j                          } | i |}t        j                          }||z
  }|j                   j                  <   t        j                  j                   d|dd       j                  j                         j                  j                  <   t        d      r1j                  j                         j                  j                  <   |S )Nr   r   r   r   )r   r   r   rP   r   r   r   r   r   r   r   )r   r   r   rq   r   r   r   r   s         r   r   z:Engine.log_execution_time_and_lm_rm_usage.<locals>.wrapper  s    J4*6*Fyy{H%
2N'5DIIdmm$KK4==/~c6J(ST*.//*T*T*VDLL't[)NN==? T]]+ Mr   r   )r   r   r   s   `` r   "log_execution_time_and_lm_rm_usagez)Engine.log_execution_time_and_lm_rm_usage  s'     
		 
	 r   c           	          t        |       D cg c]*  }t        t        | |            r|j                  d      r|, }}|D ],  }t        | |      }| j	                  |      }t        | ||       . yc c}w )z+Apply decorators to methods that need them.run_N)dircallabler   
startswithr   setattr)r   method_namemethods_to_decorateoriginal_methoddecorated_methods        r   apply_decoratorszEngine.apply_decorators   s      #4y
k238N8Nv8V 
 

 / 	9K%dK8O#FFWD+'78	9
s   /A1rn   c                      y r   r   r   r   s     r   run_knowledge_curation_modulez$Engine.run_knowledge_curation_module  r   r   c                      y r   r   r   kwargs     r   run_outline_generation_modulez$Engine.run_outline_generation_module  r   r   c                      y r   r   r  s     r   run_article_generation_modulez$Engine.run_article_generation_module  r   r   c                      y r   r   r  s     r   run_article_polishing_modulez#Engine.run_article_polishing_module  r   r   c                      y r   r   r   s     r   runz
Engine.run  r   r   c                    t        d       | j                  j                         D ]  \  }}t        | d|dd        t        d       | j                  j                         D ]:  \  }}t        |        |j                         D ]  \  }}t        d| d|         < t        d       | j                  j                         D ]  \  }}t        | d|         y )Nz***** Execution time *****: r   r   z+***** Token usage of language models: *****z    z2***** Number of queries of retrieval models: *****)printr   r   r   r   )r   kvr   r   s        r   summaryzEngine.summary   s    *+IIOO% 	+DAqQCr!C)*	+ 	;<LL&&( 	5DAqQCM&'ggi 5"
FZL6(345	5
 	BCLL&&( 	DAqQCr!+	r   c                 .    i | _         i | _        i | _        y r   )r   r   r   r   s    r   resetzEngine.reset/  s    	r   N)r   r    r!   r   r   r   r   r   r   r   r   rh   r  r  r  r	  r  r  r   r   r   r   r     s    9 (
9 BR9S         w    r   r   c                   ^    e Zd ZdZddlmZmZ dededefdZd Z	e
d	ed
ee   ddfd       Zy)Agenta  
    Interface for STORM and Co-STORM LLM agent

    This class must be implemented by any subclass of `Agent` to define how the agent generates an utterance.
    The generated utterance can be influenced by the conversation history, knowledge base, and any additional parameters passed via `kwargs`.
    The implementation should align with the specific role and perspective of the agent, as defined by the agent's topic, role name, and role description.

    Args:
        knowledge_base (KnowledgeBase): The current knowledge base (e.g., mind map in Co-STORM) that contains the accumulated information relevant to the conversation.
        conversation_history (List[ConversationTurn]): A list of past conversation turns, providing context for generating the next utterance.
                                                       The agent can refer to this history to maintain continuity and relevance in the conversation.
        logging_wrapper (LoggingWrapper): A wrapper used for logging important events during the utterance generation process.
        **kwargs: Additional arguments that can be passed to the method for more specialized utterance generation behavior depending on the agent's specific implementation.

    Returns:
        ConversationTurn: A new conversation turn generated by the agent, containing the agent's response, including the role, utterance type, and relevant information from the knowledge base.

    Notes:
        - Subclasses of `Agent` should define the exact strategy for generating the utterance, which could involve interacting with a language model, retrieving relevant knowledge, or following specific conversational policies.
        - The agent's role, perspective, and the knowledge base content will influence how the utterance is formulated.
    r   )KnowledgeBaseConversationTurnr   	role_namerole_descriptionc                 .    || _         || _        || _        y r   )r   r  r  )r   r   r  r  s       r   r   zAgent.__init__N  s    
" 0r   c                 h    | j                   r| j                   d| j                    S | j                  S )Nr  )r  r  r   s    r   get_role_descriptionzAgent.get_role_descriptionS  s2      nn%R(=(='>??~~r   knowledge_baseconversation_historylogging_wrapperr   c                      y r   r   )r   r  r  r  r   s        r   generate_utterancezAgent.generate_utteranceX  s     	r   N)r   r    r!   r"   	dataclassr  r  rI   r   r  r   r   r  r   r   r   r  r  5  sa    , ;1c 1c 1S 1

 % ##34 *	 r   r  ))concurrent.futuresr   r   r   rG   rE   r   r   abcr   r   collectionsr   typingr   r   r   r	   r
   utilsr   basicConfigINFO	getLoggerr   r   r  r   r   r$   rW   rh   r   r   r   r   r   r   r   r   r  r   r   r   <module>r)     s           # # = = (   
,,I 
		8	$/s $\
 \
~$ $4_c _D; ;|c 2c 2c .S   7
 7
tMS M`+C +r   