
    :Qg@              	      X   U d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZ dZded<    eej@                        e	 ddddddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd              Z!	 	 	 	 	 	 	 	 	 	 ddZ"	 	 	 	 	 	 ddZ#y)    )annotationsN)BytesIO)IOAnyIteratorcast)etree)add_chunking_strategy)ElementElementMetadataText)read_txt_file)FileType)exactly_onespooled_to_bytes_io_if_needed)apply_metadataget_last_modified_date)element_from_textxmlstrDETECTION_ORIGINF)filetextencodingxml_keep_tagsxml_pathc                  t        | ||       g }t        | | rt        |       nd      }t        |_        |rE| rt        | |      d   }	n"|rt        t        |      |      d   }	n|J |}	t        |	|      g}|S t        | |||      }
|
D ];  }|st        |      }t        j                  |      |_        |j                  |       = |S )	ap  Partitions an XML document into its document elements.

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    text
        The text of the XML file.
    encoding
        The encoding method used to decode the text input. If None, utf-8 will be used.
    xml_keep_tags
        If True, will retain the XML tags in the output. Otherwise it will simply extract
        the text from within the tags.
    xml_path
        The xml_path to use for extracting the text. Only used if xml_keep_tags=False.
    filenamer   r   N)r   last_modified)r   r      )r   r   )r   metadata)r   r   r   r   )r   r   r   r   detection_originr   r   r   get_leaf_elementsr   copydeepcopyr"   append)r   r   r   r   r   r   kwargselementsr"   raw_textleaf_elementsleaf_elementelements                W/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/partition/xml.pypartition_xmlr/      s    < 48 HX)?)I[_H !1H$hJ1MH$*G*MX`abcdH###Hh:; O *	
 * 	)L+L9#'==#: (		) O    c                    t        | ||       | rt        | |      S |rt        t        |      |      S t        t	        t        t        |      d            }t        ||      S )zGGet leaf elements from the XML tree defined in filename, file, or text.r   )r   )r   r   zutf-8)r   )r   _get_leaf_elementsr   r   bytesr   r   )r   r   r   r   bs        r.   r$   r$   Y   s[     48!(X>>	!'DT'JU]^^E$sD/G<=!!h77r0   c              #    K   g }t        j                  | dd      }|2t        |      \  }}t        j                  |      }d  ||      D        }|D ]  \  }}|dk(  r|j	                  |       |dk(  rD|j
                  (|j
                  j                         r|j
                   |j                          |sh|d   j                         ||j                          |s|d   j                         ' yw)	z<Parse the XML tree in a memory efficient manner if possible.)startendF)eventsresolve_entitiesNc              3  $   K   | ]  }d |f 
 yw)r7   N ).0els     r.   	<genexpr>z%_get_leaf_elements.<locals>.<genexpr>u   s     IBUBKIs   r6   r7   )
r	   	iterparsenextXPathr'   r   stripclear	getparentpop)r   r   element_stackelement_iterator_r-   compiled_pathevents           r.   r2   r2   g   s     
 +-Mt4DW\] *+
7H-I-2HI*  wG  )E>||'GLL,>,>,@ll"MMOb 1 ; ; = E b 1 ; ; = E s   B8C9;C9C9"C96C9)N)r   
str | Noner   IO[bytes] | Noner   rL   r   rL   r   boolr   rL   r(   r   returnzlist[Element])
r   rL   r   rM   r   rL   r   rL   rO   Iterator[str | None])r   zstr | IO[bytes]r   rL   rO   rP   )$
__future__r   r%   ior   typingr   r   r   r   lxmlr	   unstructured.chunkingr
   unstructured.documents.elementsr   r   r    unstructured.file_utils.encodingr   unstructured.file_utils.modelr   $unstructured.partition.common.commonr   r   &unstructured.partition.common.metadatar   r   unstructured.partition.textr   r   __annotations__XMLr/   r$   r2   r;   r0   r.   <module>r^      s   "   * *  7 J J : 2 Z 9 #  = "== = 	=
 = = = = =  =@88 088B8NX88 
    r0   