
    ^gg                     N   d dl Z d dlZd dlmZ d dlZd dlZd dlZd dlZd dlZ	d dl
Zd dlZd dlmZ d dlmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lm Z  d dl!m"Z"m#Z# d dl$m%Z%m&Z& d dl'm'Z' d dl(m)Z)  e)        dZ* ejV                  d      Z, G d d      Z-y)    N)embedding_functions)BeautifulSoup)ListDictOptional)OpenAI)CrossEncoder)Document)PyPDFLoader
TextLoader)RecursiveCharacterTextSplitter)UnstructuredMarkdownLoader)ThreadPoolExecutoras_completed)encode_imageparse_scorm_content)datetime)load_dotenvzgpt-4o-2024-08-06OPENAI_MODELc                   B   e Zd Zd-dedededededededefd	Zd
 Zdeeef   fdZ	d Z
dedee   fdZd Zd.dedededededededefdZefdZefdZd Zd Zd/dededed ee   d!ee   d"ee   defd#Zd$ Zd% Zd& Zd' Zd eddfd(Zd0d)ed ed*ed+edee   f
d,Zy)1HybridSearchcollection_name	user_codefolder_nameduckdb_pathuuid_storage_path
chunk_sizechunk_overlapalphac	                    t        j                  d      dz   t        |      z   dz   t        |      z   }	t        j                  d      dz   t        |      z   dz   t        |      z   }
t               | _        || _        || _        || _        |	dz   |z   | _        |
| _	        t        dd      | _        | j                         | _        |	dz   }t         j                  j                  |      st        j                   |       t#        j$                  |      | _        t)        j*                  t        j                  d	      t        j                  d
            }| j&                  j-                  ||      | _        t1        j2                  |	dz   |z         | _        t         j                  j7                  |	|      | _        | j;                          d| _        t?        ||t@        d      | _!        y)z5
        Initialize the hybrid search system
        DB_PATH/STORAGE_PATHz$cross-encoder/ms-marco-MiniLM-L-6-v2i   )
max_lengthz
/chroma_db)pathOPENAI_API_KEYEMBEDDING_MODEL)api_key
model_name)nameembedding_functionffffff?F)r   r   length_functionis_separator_regexN)"osgetenvstrr   openai_clientr   r   r   r   document_upload_pathr	   reranker_model_load_uuid_storagedocument_uuidsr%   existsmakedirschromadbPersistentClientchroma_clientr   OpenAIEmbeddingFunctionget_or_create_collectionchroma_collectionduckdbconnectconnjoin
duckdbpath_setup_duckdbSCORE_THRESHOLDr   lentext_splitter)selfr   r   r   r   r   r   r   r   r!   UPLOAD_PATHpersist_directory	openai_efs                4/var/www/html/answerous/dependencies/HybridSearch.py__init__zHybridSearch.__init__"   s    ))I&s*3y>9#=c+>NNii/3C	NB3Fs;GWW#X.$*!(->!>$/!*+Q^ab"557
 $L0ww~~/0KK)*%66<MN'??II./yy!23
	 "&!3!3!L!L ( "M "
 NN73;{#:;	'',,w<" <!'$	
    c                     | j                   j                  |D cg c]	  }||d   f c}      }t        ||      D ]
  \  }}||d<    |S c c}w )Ncontentreranker_score)r4   predictzip)rH   queryresultschunkscoresscores         rL   rerankerzHybridSearch.rerankerS   s`    $$,,U\-]EueI6F.G-]^0 	*LE5$)E"#	* 	 .^s   A
returnc                     t         j                  j                  | j                        r5t	        | j                  d      5 }t        j                  |      cddd       S i S # 1 sw Y   i S xY w)z8
        Load the UUID storage from a JSON file
        rN)r/   r%   r7   r   openjsonloadrH   files     rL   r5   zHybridSearch._load_uuid_storage[   sV     77>>$001d,,c2 'dyy' '	'	s    A!!A+c                     t        | j                  d      5 }t        j                  | j                  |d       ddd       y# 1 sw Y   yxY w)z6
        Save the UUID storage to a JSON file
        w   )indentN)r]   r   r^   dumpr6   r`   s     rL   _save_uuid_storagezHybridSearch._save_uuid_storaged   sA     $((#. 	;$IId))4:	; 	; 	;s   #AAdocument_namec                 `    | j                   j                         D ]  \  }}|d   |k(  s|c S  y)z=
        Retrieve the UUID for a given document name
        sanitized_nameN)r6   items)rH   rh   uuiddetailss       rL   get_document_uuidzHybridSearch.get_document_uuidk   s>     "00668 	MD''(M9	 rN   c                 z    t        j                   j                        5 }|j                  d       |j                  d       |j                  d       |j                  d       |j                  d       	 |j	                  d fdd	       d
d
d
       y
# t         j
                  $ r Y w xY w# 1 sw Y   y
xY w)z2
        Set up DuckDB tables and indexes
        z
                CREATE TABLE IF NOT EXISTS documents (
                    chunk_id VARCHAR PRIMARY KEY,
                    document_uuid VARCHAR,
                    content TEXT,
                    metadata JSON
                )
            z
                CREATE TABLE IF NOT EXISTS terms (
                    term VARCHAR,
                    chunk_id VARCHAR,
                    tf INTEGER,
                    UNIQUE(term, chunk_id)
                )
            z
                CREATE TABLE IF NOT EXISTS doc_stats (
                    collection_name VARCHAR PRIMARY KEY,
                    total_chunks INTEGER,
                    avg_chunk_length DOUBLE
                )
            zHCREATE INDEX IF NOT EXISTS idx_document_uuid ON documents(document_uuid)z2CREATE INDEX IF NOT EXISTS idx_term ON terms(term)
bm25_scorec                 .    j                  | ||||      S )N)_bm25_score)tfdfdoc_lenavg_doc_len
total_docsrH   s        rL   <lambda>z,HybridSearch._setup_duckdb.<locals>.<lambda>   s    ((R+zR rN   DOUBLE)return_typeN)r?   r@   rC   executecreate_functionCatalogException)rH   rA   s   ` rL   rD   zHybridSearch._setup_duckdbt   s     ^^DOO, -	LL   LL   LL   LLcdLLMN
$$ S (	 % K-	 -	V ** W-	 -	s*   AB18BB.+B1-B..B11B:rs   rt   ru   rv   rw   k1bc                     t        j                  ||z
  dz   |dz   z  dz         }||dz   z  ||d|z
  ||z  |z  z   z  z   z  }	t        ||	z        S )z-Calculate BM25 score for a term in a document      ?   )nplogfloat)
rH   rs   rt   ru   rv   rw   r~   r   idftf_adjusteds
             rL   rr   zHybridSearch._bm25_score   se    ffj2o+S9A=>R!V}bAEAK+<U4U.V)VWS;&''rN   c           	          t        |      }d| d| d}| j                  j                  j                  j	                  |dd|ddd| d	d
dgdgddi      }|j
                  d   j                  j                  S )Nz+
        You are given a image from a pdf: z
        Along with the image you are provided with text from the pdf that is present around it.
        Your task is to generate a detailed description of that image using the image and the context provided.
        
        <context>
        a6  
        </context>
        
        Just write the description in a natural flow, do not begin like "this image...".
        Give a detailed description, without leaving any information.
        The response must be 2-3 passage long.
        Do not assume anything, or add anything extra on your own.
        usertexttyper   	image_urlzdata:image/jpeg;base64,low)urldetail)r   r   rolerP   r   modelmessagesresponse_formatr   )r   r2   chatcompletionscreatechoicesmessagerP   )rH   pdf_name
image_pathcontextr   base64_imagepromptresponses           rL   get_image_descriptionz"HybridSearch.get_image_description   s    #J/++3* 5	
 
	 	 %%**66== # %+$*
 %0*A,(P*/) $ $FO) > 
, "**222rN   c                     d| d| d| d}| j                   j                  j                  j                  |dd|dgdgd	di
      }|j                  d   j
                  j                  S )Nz-
        You are given a table from the pdf: a   in html format.
        Along with the tab;e you are provided with text from the pdf that is present around it.
        Your task is to generate a detailed description of that table using the image and the context provided.
        
        <context>
        z,
        </context>

        <html>
        a  
        </html>
        
        Just write the description in a natural flow, do not begin like "this table...".
        Give a detailed description, without leaving any information.
        The response must be 2-3 passage long.
        You can decide the length of description based on the relevance of the context provided.
        Do not assume anything, or add anything extra on your own.
        r   r   r   r   r   r   r   )r2   r   r   r   r   r   rP   )rH   r   htmlr   r   r   r   s          rL   get_table_descriptionz"HybridSearch.get_table_description   s    --5J 7	
 
	 	 
 	& %%**66== # %+$* 
 $FO > 
 "**222rN   c                 F    g } fd}t               5 }g }	|D ]?  \  }
}t        |      D ],  \  }}|	j                  |j                  ||
||||||             . A t	        |	      D ]!  }|j                  |j                                # 	 d d d        |S # 1 sw Y   |S xY w)Nc                    g }d}t        t        d| |z
        t        | |z   dz   |d   d               D ]!  }	t        |	      |v s||t        |	         z  }# dj	                  |      }t        j                  |j                  |      d         }
|d|  d| dz   }|
j                  |       t        j                  ||      | |d	
      S )Nr   r   

imager"   _z.jpeg)pager%   page_contentmetadata)rangemaxminr1   rB   fitzPixmapextract_imagesaver
   r   )page_numkxrefdocimg_path
chunk_dict	xrefs_allr   bufferr   pixr%   rh   rH   s               rL   process_image_chunkz7HybridSearch.handle_images.<locals>.process_image_chunk   s    GFc!X%67X=NQR=RT]^`TabcTd9ef 5t9
*z#d)44G5 kk'*G ++c//5g>?C(1QCu55DHHTN !77tWU"*D9 rN   )r   	enumerateappendsubmitr   result)rH   rh   r   r   r   r   image_chunksr   executorfuturesr   xrefsr   r   futures   ``             rL   handle_imageszHybridSearch.handle_images   s    	&  ! 	5XG#, B%(/ BGAtNN8??3FRSUY[^`hjtv  $A  BBB
 'w/ 5##FMMO45	5 	5 s   A6BB c                     g }t        j                  |d      }t        |j                         D cg c]  }t	        |       c}      } fd}t               5 }	g }
|D ]3  }|j                  }|
j                  |	j                  ||||||             5 t        |
      D ]!  }|j                  |j                                # 	 d d d        |S c c}w # 1 sw Y   |S xY w)Nall)pagesc                 >   g }d}t        t        d| |z
        t        | |z   dz   |            D ]!  }t        |      |v s||t        |         z  }# dj	                  |      }|j
                  j                         }t        	j                  |||      | |d      S )Nr   r   r   )r   r   r   )	r   r   r   r1   rB   rt   to_htmlr
   r   )
r   tablerh   r   	last_pager   r   r   r   rH   s
            rL   process_table_chunkz7HybridSearch.handle_tables.<locals>.process_table_chunk!  s    GFc!X%67X=NQR=RT]9^_ 5t9
*z#d)44G5 kk'*G88##%D!77tWU"*D9 rN   )camelotread_pdfr   keysintr   r   r   r   r   r   )rH   document_pathrh   r   table_chunkstablespr   r   r   r   r   r   r   s   `             rL   handle_tableszHybridSearch.handle_tables  s    !!-u=):;AQ;<		  ! 		5XG  ::OO$75-Yceno 'w/ 5##FMMO45		5 5 <		5 s   CA*CCNr   rj   document_uuidr   
upload_dirc                    |t        t        j                               }|i }||||d| j                  |<   | j	                          g }g }g }	|j                  d      r| j                   d| d}
| j                   d| d}t        j                  |
d       t        j                  |d       t        |      }|j                         }i }|D ]|  }d	|j                  d
<   d|j                  d<   |j                  t        |j                  d         g        |t        |j                  d            j                  |j                         ~ | j                  |||      }n_|j                  d      r| j!                  ||      }g }|j#                         D ]-  \  }}|d   }|dd}t%        ||      }|j                  |       / t'               j)                  |      }t+        |      D ]  \  }}||j                  d<    n|j                  d      rWt-        |      }|j                         }t'               j)                  |      }t+        |      D ]  \  }}||j                  d<    nVt/        |      }|j                         }t'               j)                  |      }t+        |      D ]  \  }}||j                  d<    t1        j2                  | j4                        5 }|j7                  d|g       |j7                  d|g       | j8                  j;                  d|i      d   }|r| j8                  j=                  |       g }|	r| j?                  |	||       tA        d       |r| jC                  |||       tA        d       tE               5 }g }t+        |      D ]2  \  }}|j                  |jG                  | jH                  |||             4 tK        |      D ]  }|jM                           	 ddd       tO        jP                  |      }tA        d       |j7                  d       |j7                  d|g       ddd       |S # 1 sw Y   WxY w# 1 sw Y   |S xY w)zL
        Process and upsert a document document into both databases
        N)rh   rj   r   r   z.pdfr"   z/imagesz/tablesT)exist_okr   r   r%   r   z.ziprP   scorm)sourcer   r   z.txt-DELETE FROM documents WHERE document_uuid = ?\DELETE FROM terms WHERE chunk_id IN (SELECT chunk_id FROM documents WHERE document_uuid = ?)r   whereidsr   zImages processedzTables processedzText Chunks processedz-INSERT INTO documents SELECT * FROM chunks_dfa  
                INSERT OR REPLACE INTO doc_stats 
                SELECT 
                    ? as collection_name,
                    COUNT(*) as total_chunks,
                    AVG(LENGTH(content)) as avg_chunk_length
                FROM documents
            ))r1   rl   uuid4r6   rg   endswithr3   r/   r8   r   r_   r   
setdefaultr   r   r   extract_scorm_contentrk   r
   r   split_documentsr   r   r   r?   r@   rC   r{   r>   getdeleteprocess_image_chunksprintprocess_table_chunksr   r   process_chunkr   r   pd	DataFrame)rH   r   rh   rj   r   r   r   chunksr   r   r   
table_pathloaderr   rV   	documents	file_nametext_contentr   irA   existing_ids
all_chunksr   r   	chunk_numr   	chunks_dfs                               rL   upsert_documentzHybridSearch.upsert_document;  sc     

-MH +,* 	.
M* 	!!!&)334Am_GLH 556agNJKK40KK
T2 !/F[[]FJ S'-v&'+v&%%c%..*@&A2F3u~~f567>>u?Q?QR	S  --m]JWL##F+33M*MJI$.$4$4$6 & 	5$Y/&/A L8L  %& 45EEiPF &f- +5)*v&+ ##F+.F[[]F35EEfMF%f- +5)*v&+ 0>F[[]F35EEfMF%f- +5)*v&+ ^^DOO, 1	"LL? LLn
  1155_m<\5]^cdL&&--,-? J)),zR())),zR() $% $(1&(9 j$IuNN8??43E3Eum]g#hij +73 $FMMO$$ Z0I)*LLHI LL   "U1	"f 1$ $71	"f s'   #B2QA#Q9AQQ	QQ%c                    | j                   j                  |j                        }t        |      D ]*  \  }}| d|j                  d    d| d}||j                  d   |dd}| j
                  j                  |g|g|g       |j                  ||||d       |j                         j                         }	t        j                  |	      j                         }
g }|
j                         D ]  \  }}|j                  |||d	        |st        j                  |      }t        j                   | j"                        5 }|j%                  d
       ddd       - y# 1 sw Y   9xY w)|
        Process a single page and add chunks to databases.
        This method will run in parallel for each page.
        -pr   -cz-tr   )r   page_numberchunk_numberr   r   r   	metadataschunk_idr   rP   r   termr
  rs   'INSERT INTO terms SELECT * FROM term_dfN)rG   
split_textr   r   r   r>   addr   lowersplitr   Seriesvalue_countsrk   r   r?   r@   rC   r{   )rH   parent_chunkr   r   r   r   rV   r
  chunk_metadataterms	term_freq	term_datar  freqterm_dfrA   s                   rL   r   zHybridSearch.process_chunk  s   
 ##..|/H/HI )& 1 *	LIu'<+@+@+H*II;VXYH "/+44V< )	N ""&&J ')* '  $!. *	  KKM'')E		%(557II'oo/ 
d    ("  ,,y1^^DOO4 LLL!JKL LS*	LRL Ls   E  E*	c                    t        |      D ]W  \  }}| d|j                  d    d| d}||j                  d   d|j                  d   |d}| j                  j                  |g|j                  g|g       |j                  |||j                  |d	       |j                  j                         j                         }t        j                  |      j                         }	g }
|	j                         D ]  \  }}|
j                  |||d
        |
s
t        j                  |
      }t        j                  | j                        5 }|j!                  d       ddd       Z y# 1 sw Y   fxY w)r  r  r   r  -ir   r%   )r   r  r   r%   r  r  r	  r  r  Nr   r   r>   r  r   r   r  r  r   r  r  rk   r   r?   r@   rC   r{   )rH   r   r   r   r   rV   r
  r  r  r  r  r  r  r  rA   s                  rL   r   z!HybridSearch.process_image_chunks     
 !*, 7 *	LIu'5>>&+A*B"YKrRH "/$~~f5v. )N ""&&J --.)* '  $!. --*	  &&,,.446E		%(557II'oo/ 
d    ("  ,,y1^^DOO4 LLL!JKL LS*	LRL L   
E((E2	c                    t        |      D ]W  \  }}| d|j                  d    d| d}||j                  d   d|j                  d   |d}| j                  j                  |g|j                  g|g       |j                  |||j                  |d	       |j                  j                         j                         }t        j                  |      j                         }	g }
|	j                         D ]  \  }}|
j                  |||d
        |
s
t        j                  |
      }t        j                  | j                        5 }|j!                  d       ddd       Z y# 1 sw Y   fxY w)r  r  r   r  r  r   r   )r   r  r   r   r  r  r	  r  r  Nr  )rH   r   r   r   r   rV   r
  r  r  r  r  r  r  r  rA   s                  rL   r   z!HybridSearch.process_table_chunks%  r  r  c                    t         j                  j                  |t         j                  j                  t         j                  j	                  |            d         }	 t        j                  |d      5 }|j                  |       d d d        t        |      }i }|D ]n  }t        |t              rN|j                  dd      }	|j                  dd      }
t        |
d      }|j                  d	d
      }d|||d||	<   at        d|        p |S # 1 sw Y   xY w# t        $ r}t        d|        Y d }~d }~ww xY w)Nr   r\   z'Error while extracting SCORM document: idunknownr    zhtml.parser T)	separatorstripr   )r   r%   rP   r   zSkipping invalid chunk: )r/   r%   rB   splitextbasenamezipfileZipFile
extractall	Exceptionr   r   
isinstancedictr   r   get_text)rH   r   r   extract_pathzip_refer   r   rV   r   html_contentsoupr   s                rL   r   z"HybridSearch.extract_scorm_contentV  s4   ww||J0@0@AQAQR_A`0abc0de	A4 1""<01
 %\2
 	:E%&!IIdI6	$yy4$\=A#}}s$}G $(+$0	)
9% 089!	:$ 51 1 	A;A3?@@	As0   D 5DD DD 	D>&D99D>c                    t        j                  | j                        5 }|j                  d|g       |j                  d|g       ddd       	 | j                  j                  d|i      d   }|r| j                  j                  |       t        d	| d
       y# 1 sw Y   YxY w# t        $ r}t        d|        Y d}~8d}~ww xY w)z
        Delete all data associated with a given document UUID from DuckDB and ChromaDB.
        
        Args:
            document_uuid (str): The unique identifier of the document to delete.
        r   r   Nr   r   r   r   z$Error while deleting from ChromaDB: z'All data associated with document UUID z has been deleted.)	r?   r@   rC   r{   r>   r   r   r-  r   )rH   r   rA   r   r3  s        rL   delete_collection_by_uuidz&HybridSearch.delete_collection_by_uuidw  s     ^^DOO, 	LL? LLn		>1155&6 6 L &&--,-? 	7FXYZ)	 	"  	>8<==	>s#   'B?B+ B(+	C4CCrT   top_kfinal_nc                    |r$| j                   j                  |gdd|ii|      }n| j                   j                  |g|      }|j                         j                         }dj	                  dj                  |D cg c]  }d c}            }	| j                  j                  |	|| j                  g|z   |||gz         j                         }
|d   d	   }i }|rRt        |      }t        |      }t        |d
   d	   |      D ]&  \  }}|| j                  k\  sd||z
  ||z
  z  z
  ||<   ( ni }d}|
j                         D ci c]  \  }}|d   |d    }}}|rt        |j                               }t        |j                               }||kD  r-|j!                         D ci c]  \  }}|||z
  ||z
  z   }}}n|D ci c]  }|d }}|j!                         D ci c]  \  }}|| j                  k\  s|| }}}t#        |j%                               t#        |j%                               z  }|syg }t'        j(                  | j*                        5 }|D ]  }|s|j-                  |d	      }|j-                  |d	      }||z  d|z
  |z  z   }|j                  d|g      j                         }|j.                  rddd}n|j0                  d	   }|j3                  ||d   r|d   nd|d   r|d   nd|||d        	 ddd       |sy| j5                  ||      }t7        |d d      d| S c c}w c c}}w c c}}w c c}w c c}}w # 1 sw Y   LxY w)zB
        Perform hybrid search within a specific document
        r   z$eq)query_textsr   	n_results)r;  r<  aF  
        WITH document_stats AS (
            -- Calculate statistics specific to the document
            SELECT COUNT(*) AS total_chunks, AVG(LENGTH(content)) AS avg_chunk_length
            FROM documents
            WHERE document_uuid = ?  -- Filter by the specific document
        ),
        doc_lengths AS (
            -- Get the length of each chunk within the specific document
            SELECT d.chunk_id, LENGTH(d.content) AS doc_len
            FROM documents d
            WHERE d.document_uuid = ?  -- Filter by the specific document
        ),
        term_stats AS (
            -- Calculate term statistics specific to the document
            SELECT 
                t.term,
                t.chunk_id,
                t.tf,
                COUNT(*) OVER (PARTITION BY t.term) AS df  -- Document frequency within this document
            FROM terms t
            JOIN documents d ON t.chunk_id = d.chunk_id
            WHERE t.term IN ({}) AND d.document_uuid = ?  -- Filter terms by the specific document
        ),
        scores AS (
            -- Calculate BM25 score for each chunk in the specific document
            SELECT 
                t.chunk_id,
                SUM(bm25_score(
                    t.tf, 
                    t.df, 
                    dl.doc_len, 
                    ps.avg_chunk_length,   -- Use document-level average chunk length
                    ps.total_chunks        -- Use document-level total chunks
                )) AS bm25_score
            FROM term_stats t
            JOIN doc_lengths dl ON t.chunk_id = dl.chunk_id
            CROSS JOIN document_stats ps  -- Join with document-specific statistics
            GROUP BY t.chunk_id
        )
        SELECT 
            d.chunk_id,
            d.content,
            d.metadata,
            COALESCE(s.bm25_score, 0) AS bm25_score  -- Default to 0 if no BM25 score
        FROM documents d
        LEFT JOIN scores s ON d.chunk_id = s.chunk_id
        WHERE d.document_uuid = ?  -- Filter by the specific document
        ORDER BY bm25_score DESC  -- Sort by BM25 score
        LIMIT ?
        ,?	distancesr   r   r   r,   r
  rp   r   Fz*SELECT * FROM documents WHERE chunk_id = ?r$  )rP   r   rP   r   )r
  rP   r   combined_scorevector_scorerp   Nc                     | d   S )NrQ    )xs    rL   rx   z%HybridSearch.search.<locals>.<lambda>3  s    a8H6I rN   T)keyreverse)r>   rT   r  r  formatrB   rA   r{   r   fetchdfr   r   rS   rE   iterrowsvaluesrk   setr   r?   r@   rC   r   emptyilocr   rY   sorted)rH   rT   r   r8  r9  r   vector_resultsquery_termsr   
bm25_querybm25_resultsvector_distancesvector_scoresmin_distancemax_distancer
  distancerE   rowbm25_scoresmin_bm25_scoremax_bm25_scorerX   r   combined_resultsrA   rA  rp   r@  chunk_details_dfchunk_detailss                                  rL   searchzHybridSearch.search  s    
 !3399"G&(>? : N "3399"G : N
 kkm))+2d F388+6QS678e 	h yy((D001K?=R_afBgg
 ') 	 *+6q9/0L/0L
 '*.*?*BDT&U l"(!5!55./8l3J|^jOj2k.kM(+l M '//1
3 
OS..
 
  !3!3!56N !3!3!56N. ,7+<+<+>'% u~5.>:YZZ  >IIx}II 8C7H7H7J$3HeeW[WkWkNk%K  ++-.[5E5E5G1HH
^^DOO, 	& #0#4#4Xq#AL!,1!=J &+\%9QY*<T%TN (,||D!
( gi % (--46B(G(8(=(=a(@$++$,?LY?W=#;]_ANyAYM*$=_a*8(4&0- )	<  ==0@A&,ISWXYaZabbs 72
 J	 	s7   =	L6L;'M
M$M<MM$B"MM)z	search.dbzdocument_uuids.jsoni     r   )g      ?g      ?)NNN)      r   )__name__
__module____qualname__r1   r   r   rM   rY   r   r5   rg   r   rn   rD   rr   gpt_4o_minir   r   r   r   r   r   r   r   r   r7  r   r_  rC  rN   rL   r   r   !   s   /
 /
 /
# /
\_ /
  CF /
  lo /
  GJ /
  Y^ /
bDdO ;s x} 1f(c (s (S (u (Z] (ch (sx (  EJ ( JU &3P DO #3J D>ES E EUX Eiqruiv E  JR  SW  JX E  mu  vy  mz E  FI EN0Ld/Lb/LbB[s [t [<^cC ^c ^cC ^cs ^chlmqhr ^crN   r   ).r?   r9   chromadb.utilsr   r/   rl   r   r^   pandasr   numpyr   r*  bs4r   typingr   r   r   r   openair   sentence_transformersr	   langchain.schema.documentr
   langchain.document_loadersr   r   langchain.text_splitterr   $langchain_community.document_loadersr   concurrent.futuresr   r   dependencies.helperr   r   r   dotenvr   gpt_4or0   rf  r   rC  rN   rL   <module>rv     su      . 	        ' '   . . > B K ? A   BIIn%Rc RcrN   