
    Ig                     d    d dl mZmZmZmZ d dlmZ erd dlmZ  G d de      Z G d de	      Z
y)	    )TYPE_CHECKINGDictListUnion)UnstructuredFileLoaderchmc                       e Zd ZdZdefdZy)UnstructuredCHMLoaderar  Load `CHM` files using `Unstructured`.

    CHM means Microsoft Compiled HTML Help.

    Examples
    --------
    from langchain_community.document_loaders import UnstructuredCHMLoader

    loader = UnstructuredCHMLoader("example.chm")
    docs = loader.load()

    References
    ----------
    https://github.com/dottedmag/pychm
    http://www.jedrea.com/chmlib/
    returnc           
          ddl m} t        | j                        5 }|j	                         D cg c]  } |dd|d   i| j
                   c}cd d d        S c c}w # 1 sw Y   y xY w)Nr   )partition_htmltextcontent )unstructured.partition.htmlr   	CHMParser	file_pathload_allunstructured_kwargs)selfr   fitems       e/var/www/html/answerous/venv/lib/python3.12/site-packages/langchain_community/document_loaders/chm.py_get_elementsz#UnstructuredCHMLoader._get_elements   sg    >t~~& 	! JJL PDOPt7O7OP	 		 	s   AAAAA'N)__name__
__module____qualname____doc__r   r   r       r   r   r   	   s    "t r    r   c                       e Zd ZU dZeed<   ded<   defdZd Zd Ze	defd	       Z
deeeef      fd
Zdeeef   defdZdeeeef      fdZy)r   z*Microsoft Compiled HTML Help (CHM) Parser.pathzchm.CHMFilefilec                     ddl m } || _         |j                         | _        | j                  j	                  |       y )Nr   r   )r	   r"   CHMFiler#   LoadCHM)r   r"   r	   s      r   __init__zCHMParser.__init__+   s-    	CKKM			$r    c                     | S Nr   r   s    r   	__enter__zCHMParser.__enter__2   s    r    c                 R    | j                   r| j                   j                          y y r)   )r#   CloseCHM)r   exc_type	exc_value	tracebacks       r   __exit__zCHMParser.__exit__5   s    99II  r    r   c                 T    | j                   j                         j                  d      S )Nutf-8)r#   GetEncodingdecoder*   s    r   encodingzCHMParser.encoding9   s     yy$$&--g66r    c                    ddl m} ddlm} g }| j                  j                         j                  | j                        } ||      }|j                  d      D ]x  }d}d}|j                  d      D ]  }	|	d   dk(  r|	d	   }|	d   d
k(  s|	d	   } |r|s= ||      j                  }|j                  d      sd|z   }|j                  ||d       z |S )Nr   )urlparse)BeautifulSoupobject paramnameNamevalueLocal/)r=   local)urllib.parser8   bs4r9   r#   GetTopicsTreer5   r6   find_allr"   
startswithappend)
r   r8   r9   resindexsoupobjr=   rB   r<   s
             r   rJ   zCHMParser.index=   s    )%		'')00?U#==* 	7C DEg. +=F* >D=G+!'NE	+
 uUO((E##C(eJJu56!	7$ 
r    c                     t        |t              r|j                  d      }| j                  j	                  |      d   }| j                  j                  |      d   j                  | j                        S )Nr3      )
isinstancestrencoder#   ResolveObjectRetrieveObjectr5   r6   )r   r"   rL   s      r   loadzCHMParser.loadZ   s\    dC ;;w'Dii%%d+A.yy'',Q/66t}}EEr    c                     g }| j                         }|D ]1  }| j                  |d         }|j                  |d   |d   |d       3 |S )NrB   r=   )r=   rB   r   )rJ   rT   rH   )r   rI   rJ   r   r   s        r   r   zCHMParser.load_all`   sX    

 	DiiW.GJJfW'R	
 
r    N)r   r   r   r   rP   __annotations__r'   r+   r1   propertyr6   r   r   rJ   r   bytesrT   r   r   r    r   r   r   %   s    4
I
 S  ! 7# 7 7tDcN+ :FsEz* Fs F$tCH~. r    r   N)typingr   r   r   r   1langchain_community.document_loaders.unstructuredr   r	   r   r:   r   r   r    r   <module>r[      s/    3 3 T2 8C Cr    