o
    G+g                     @   sn  d dl mZmZmZmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlm Z  d dl!Z!d dl"T d dl#m$Z% d dl&m'Z' e Z(e  ee)*de)*ddZe)*d de!+ j, Z-e. Z/e(0dededfdede1fddZ2e(3dde4de4de4fddZ5dS )     )
UploadFileFileForm	APIRouterDependsHTTPExceptionstatus)OAuth2PasswordRequestForm
HTTPBearerHTTPAuthorizationCredentials)JSONResponse)QdrantClientmodels)load_dotenv)	PdfReader)RecursiveCharacterTextSplitter)VectorParamsDistanceSparseVectorParamsModifier)PointStruct)LateInteractionTextEmbedding)Bm25N)*)	Documents
QDRANT_URLQDRANT_API_KEY)urlapi_keyCOLLECTION_NAME_z
/uploadpdf.filefolderIdc              
      s  d| j  }t|d}|| j  W d    n1 sw   Y  t|}d}|jD ]
}|d|  7 }q-tddt	dd}|
|}d	  fd
d}	g }
|D ]	}|
|	| qQtd}t||}td}t||}tjttdtjdtt	|d d tjjtjtjjddddttjditjddd | j  }g }tt	|D ] }|t||
| ||   || ! d||| |dd qtj"t|d t#d tj$ttjddd |rt%& }t't| j d|d }|(| |)  |*| d!| j td"S )#Nzuploads/zwb+ z 
i  i  F)
chunk_sizechunk_overlaplength_functionis_separator_regexztext-embedding-3-smallc                    s&   |  dd} tjj| g djd jS )N
 )inputmodelr   )replaceopenai_client
embeddingscreatedata	embedding)textopenai_embeddings_model 2/var/www/html/answerit/server/routers/documents.pygenerate_embeddings5   s   z'upload_pdf.<locals>.generate_embeddingszcolbert-ir/colbertv2.0zQdrant/bm25i   )sizedistancer   )
comparator)r8   r9   multivector_config)dense_embeddingslate_interactionsbm25)modifier)indexing_threshold)collection_namevectors_configsparse_vectors_configoptimizers_config)r<   r>   r=   )_idr2   pdf_id)idvectorpayload)rA   pointszUploaded document points.i N  )rA   optimizer_configpdf)rA   document_namedocument_type	folder_idzPDF uploaded and processed)r   filenamerA   )+rP   openwriter!   readr   pagesextract_textr   len
split_textappendr   listpassage_embedr   qdrant_clientcreate_collectionrA   r   r   COSINEr   MultiVectorConfigMultiVectorComparatorMAX_SIMr   r   IDFOptimizersConfigDiffranger   	as_objecttolistupsertprintupdate_collectiondbaseSessionLocalr   addcommitrefresh)r!   r"   file_locationfile_objectreaderpdf_txtpagetext_splitterchunksr7   r<   chunk late_interaction_embedding_modellate_interaction_embeddings
bm25_modelsparse_embeddingsrF   rJ   idbnew_documentr5   r3   r6   
upload_pdf   s   







r}   z/chat_with_filequeryrP   doc_idc                    s@   t  }|ttjt|k }|j}t	| |}d|iS )Nfull_result)
ri   rj   r~   r   filterrG   intfirstrA   answer)r~   rP   r   r{   documentrA   responser5   r5   r6   chat_with_file   s   
r   )6fastapir   r   r   r   r   r   r   fastapi.securityr	   r
   r   fastapi.responsesr   r[   r   r   dotenvr   PyPDF2r   langchain.text_splitterr   qdrant_client.modelsr   r   r   r   r   fastembed.late_interactionr   fastembed.sparse.bm25r   uuiddependencies.utilsdb_config.databasedatabaseri   db_config.modelsr   routerosgetenvuuid4hexrA   OpenAIr-   postr   r}   getstrr   r5   r5   r5   r6   <module>   s6   $ "l