
    :QgD                         d dl mZmZmZmZ d dlZd dlmZ dZg dZ	de
de
fdZde
defd	Z	 	 dd
e
deeeee   f      dee
e
f   fdZ	 	 	 dd
e
deeeee   f      dee
   dee
e
f   fdZy)    )IOOptionalTupleUnionN)convert_to_bytesg?)utf_8
iso_8859_1
iso_8859_6
iso_8859_8asciibig5utf_16	utf_16_be	utf_16_leutf_32	utf_32_be	utf_32_leeuc_jis_2004euc_jisx0213euc_jpeuc_krgb18030	shift_jisshift_jis_2004shift_jisx0213encodingreturnc                 `    | j                         j                  dd      }g d}||v r|dd }|S )zFormat input encoding string (e.g., `utf-8`, `iso-8859-1`, etc).
    Parameters
    ----------
    encoding
        The encoding string to be formatted (e.g., `UTF-8`, `utf_8`, `ISO-8859-1`, `iso_8859_1`,
        etc).
    _-)ziso-8859-6-iziso-8859-6-eziso-8859-8-iziso-8859-8-eN)lowerreplace)r   formatted_encodingannotated_encodingss      ]/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/file_utils/encoding.pyformat_encoding_strr'   "   sC     ")11#s; [00/4    c                 L    t         D ]  }t        |      t        |       k(  s y y)zChecks if an encoding string is valid. Helps to avoid errors in cases where
    invalid encodings are extracted from malformed documents.TF)COMMON_ENCODINGSr'   )r   common_encodings     r&   validate_encodingr,   4   s.     , /3Fx3PP r(   filenamefilec                 4   | r&t        | d      5 }|j                         }d d d        n|rt        |      }nt        d      t	        j
                        }|d   }|d   }|	|t        k  rat        D ]@  }	 | r't        | |      5 }|j                         }d d d        n|j                  |      }|} n* t        d|dt        |      d      |j                  |      }t        |      }	|	fS # 1 sw Y   xY w# 1 sw Y   RxY w# t        t        f$ r Y w xY w)	Nrbz#No filename nor file were specifiedr   
confidencer   z]Unable to determine the encoding of the file or match it with any of the specified encodings.r   zInvalid encoding)openreadr   FileNotFoundErrorchardetdetectENCODE_REC_THRESHOLDr*   decodeUnicodeDecodeErrorUnicodeErrorlenr'   )
r-   r.   f	byte_dataresultr   r1   enc	file_textr$   s
             r&   detect_file_encodingrB   =   sH    (D! 	!QI	! 	!	$T*	 EFF^^I&Fj!H%J:(<<# 	C	h5 -$%FFH	- - !* 0 0 5I	 %.I"  $$X.	,X6y((O	! 	! - - '5 s5   C-6DC9D-C69D	>DDDc                    | rK|r7t        |      }t        | |      5 }	 |j                         }	 ddd       ||fS t        |       \  }}||fS |rk|rVt        |      }	 t        |t              r|n|j                         }t        |t              r|j                  |      }n|}||fS t        |      \  }}||fS t        d      # t        t        f$ r}|d}~ww xY w# 1 sw Y   |fS xY w# t        t        f$ r}|d}~ww xY w)z6Extracts document metadata from a plain text document.r2   N)r.   zNo filename was specified)
r'   r3   r4   r:   r;   rB   
isinstancebytesr9   r5   )r-   r.   r   r$   r=   rA   errorfile_contents           r&   read_txt_filerH   l   s<    !4X!>h);<    !I . y((# -A,J)	" y((! 
!4X!>'1$'>tDIIKlE2 , 3 34F GI ,I y((	 -Ad,K)	 y((   ;<<% +L9  K  . y(( '5 sA   CCAC. CCCCC+.D=C??D) N)rI   NN)typingr   r   r   r   r6   $unstructured.partition.common.commonr   r8   r*   strr'   boolr,   rE   rB   rH    r(   r&   <module>rO      s    - -  A  0# # $   .2,),)
55	)*
+,) 38_,)` .2" ) )
55	)*
+ ) sm ) 38_	 )r(   