
    :Qg              	         d dl mZ d dlZd dlZd dlmZmZmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d d	lmZ d d
lm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-  e%ej\                        e
	 ddddddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd              Z/	 	 d	 	 	 	 	 	 	 ddZ0	 	 	 	 	 	 ddZ1ddZ2	 d	 	 	 	 	 	 	 ddZ3	 d	 	 	 	 	 	 	 ddZ4d dZ5y)!    )annotationsN)IOAnyCallableLiteral)add_chunking_strategy)auto_paragraph_grouperclean_bullets)CoordinateSystem)
AddressElementElementMetadataEmailAddressFooterHeaderListItemNarrativeTextTextTitle)read_txt_file)FileType)PARAGRAPH_PATTERNUNICODE_BULLETS_RE)exactly_one)apply_metadataget_last_modified_date)is_bulleted_textis_email_addressis_possible_narrative_textis_possible_numbered_listis_possible_titleis_us_city_state_ziptext)fileencodingr#   paragraph_grouperdetection_originc               
   ||j                         dk(  r|s| sg S t        | ||       d}| t        | |      \  }}n |t        ||      \  }}n|t        |      }|du rn|	 ||      }nt	        |      }t        |      }g }	t        | rt        |       nd      }
||
_        |D ]W  }|j                         }|st        |      r"t        |      }t        j                  |
      |_        |	j                  |       Y |	S )a  Partition a .txt documents into its constituent paragraph elements.

    If paragraphs are below "min_partition" or above "max_partition" boundaries,
    they are combined or split.
    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    encoding
        The encoding method used to decode the input bytes when drawn from `filename` or `file`.
        Defaults to "utf-8".
    text
        The string representation of the .txt document.
    paragrapher_grouper
        A str -> str function for fixing paragraphs that are interrupted by line breaks
        for formatting purposes.
    N )filenamer$   r#   )r*   r%   )r$   r%   F)last_modified)stripr   r   strr	   _split_by_paragraphr   r   r'   _is_empty_bulletelement_from_textcopydeepcopymetadataappend)r*   r$   r%   r#   r&   r'   kwargs	file_textfile_contentelementsr3   ctextelements                X/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/partition/text.pypartition_textr<   (   s   > DJJLB.tH	 48I+XQ)		+I)		I	E!		&%i0	*95	&y1L H:B,X6H !1H %)%0'.G#}}X6GOOG$% O    c                   t        ||      rt        | ||      S t        ||      rt        | ||      S t	        |       rt        |       }t        |||      S t        |       rt        |       S t        |       rt        | ||      S t        |       rt        | ||      S t        |       rt        | ||      S t        |       rt        | ||      S t!        | ||      S )N)r#   coordinatescoordinate_systemr#   )_is_in_header_positionr   _is_in_footer_positionr   r   r
   r   r   r   r"   r   r    r   r   r!   r   r   )r#   r?   r@   
clean_texts       r;   r0   r0   o   s   
 k+<=#/
 	

 
 ->	?#/
 	

 
$	"4(
#/
 	

 
$	&&	d	##/
 	

 
#4	(#/
 	

 
$D	)#/
 	

 
4	 #/
 	
 #/
 	
r=   c                \    t        d | D              t        |       z  }||j                  z  S )Nc              3  &   K   | ]	  }|d      yw)   N ).0
coordinates     r;   	<genexpr>z)_get_height_percentage.<locals>.<genexpr>   s     <*
1<s   )sumlenheight)r?   r@   avg_ys      r;   _get_height_percentagerP      s0     <<<s;?OOE$++++r=   c                ^    t        t        j                  |       xr t        |       dk(        S )z(Checks if input text is an empty bullet.rG   )boolr   matchrM   rA   s    r;   r/   r/      s&    "((.A3t9>BBr=   c                .    | |yt        | |      }||kD  S )zZChecks to see if the position of the text indicates that the text belongs
    to a footer.FrP   r?   r@   	thresholdheight_percentages       r;   rC   rC      s,     /7.{<MNy((r=   c                .    | |yt        | |      }||k  S )zVChecks to see if the position of the text indicates that the text belongs to a header.FrU   rV   s       r;   rB   rB      s,     /7.{<MNy((r=   c                R    t        j                  t        | j                               S )zSplit text into paragraphs.)resplitr   r,   )r6   s    r;   r.   r.      s    88%y'899r=   )N)r*   
str | Noner$   zIO[bytes] | Noner%   r]   r#   r]   r&   z,Callable[[str], str] | Literal[False] | Noner'   r]   r5   r   returnzlist[Element])NN)r#   r-   r?   &tuple[tuple[float, float], ...] | Noner@   CoordinateSystem | Noner^   r   )r?   ztuple[tuple[float, float], ...]r@   r   r^   float)r#   r-   r^   rR   )g(\?)r?   r_   r@   r`   rW   ra   r^   rR   )gQ?)r6   r-   r^   z	list[str])6
__future__r   r1   r[   typingr   r   r   r   unstructured.chunkingr   unstructured.cleaners.corer	   r
   "unstructured.documents.coordinatesr   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r    unstructured.file_utils.encodingr   unstructured.file_utils.modelr   unstructured.nlp.patternsr   r   $unstructured.partition.common.commonr   &unstructured.partition.common.metadatar   r    unstructured.partition.text_typer   r   r   r    r!   r"   TXTr<   r0   rP   r/   rC   rB   r.   rH   r=   r;   <module>ro      s   "  	 - - 7 @   ; 2 K < Y  B "FJ#)BB B 	B
 B DB !B B B  BN ;?157

7
77
 /7
 	7
~,0,', ,C )7).) ) 
	)" 
)7
).
) 
) 
	
):r=   