
    :Qg8                    V    d Z ddlmZ ddlZddlmZmZ  G d dej                        Zy)zDomain-model for file-types.    )annotationsN)Iterablecastc            
      t   e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   	 ded	<   	 ded
<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZedd       Zedd       Z	e
dd       Ze
dd       Ze
dd       Ze
dd       Ze
dd       Ze
dd       Ze
dd       Zdddgddgd eee   g       fZdddgddgdg dfZd d d!gd d"gd# eee   g       fZd!d!d!gd!d$gd% eee   g       fZd&d' eee   g       d(d)d*gd+ eee   g       fZd,d,d-gd,d.gd/d0gfZd1ddgdd2gd3 eee   g       fZd4d4 eee   g       d(d5d6gd7 eee   g       fZd8ddgdd9d:gd; eee   g       fZd<d< eee   g       d(d=gd> eee   g       fZd?d?d@gd?dAgdBdCgfZdDdDdEgdDdFgdG eee   g       fZ dHdHd!d-gdHdIgdJ eee   g       fZ!dKdKd-gdKdLgdM eee   g       fZ"dNdNg dOdNdPgdQ eee   g       fZ#dRddgddSgdT eee   g       fZ$dUdUdVgdUdWgdX eee   g       fZ%dVdVdVgdVdYgdZ eee   g       fZ&d[d[d-gd[d\gd] eee   g       fZ'd^d^d-gd^d_gd`dagfZ(dbddgddcgdd eee   g       fZ)dededgdedfdggdh eee   g       fZ*didj eee   g       d(g dkdlg dmfZ+dnd( eee   g       d(dogdpg dqfZ,drdsddtgdsdugdv eee   g       fZ-dsdsddtgdsdwgdx eee   g       fZ.dydy eee   g       d(dzgd{d|gfZ/d}d( eee   g       d(d~gd eee   g       fZ0dd( eee   g       d( eee   g       d eee   g       fZ1dd( eee   g       d( eee   g       d eee   g       fZ2y()FileTypezThe collection of file-types recognized by `unstructured`.

    Note not all of these can be partitioned, e.g. WAV and ZIP have no partitioner.
    
str | None_partitioner_shortnametuple[str, ...] _importable_package_dependencies_extra_name_extensionsstr_canonical_mime_type_alias_mime_typesc                    t         j                  |       }||_        ||_        t	        |      |_        ||_        t	        |      |_        ||_        t	        |      |_	        |S )N)
object__new___value_r	   tupler   r   r   r   r   )	clsvaluepartitioner_shortnameimportable_package_dependencies
extra_name
extensionscanonical_mime_typealias_mime_typesselfs	            Z/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/file_utils/model.pyr   zFileType.__new__!   s_     ~~c"&;#056U0V-% ,$7!!&'7!8    c                4    | j                   |j                   k  S )zMakes `FileType` members comparable with relational operators, at least with `<`.

        This makes them sortable, in particular it supports sorting for pandas groupby functions.
        )name)r   others     r   __lt__zFileType.__lt__5   s    
 yy5::%%r    c                p    |dv ry| j                   j                         D ]  }||j                  v s|c S  y)aP  Select a FileType member based on an extension.

        `extension` must include the leading period, like `".pdf"`. Extension is suitable as a
        secondary file-type identification method but is unreliable for primary identification.

        Returns `None` when `extension` is not registered for any supported file-type.
        )N .N)__members__valuesr   )r   	extensionms      r   from_extensionzFileType.from_extension<   sC     ' '') 	AAMM)	 r    c                    |y| j                   j                         D ]#  }||j                  k(  s||j                  v s!|c S  y)zSelect a FileType member based on a MIME-type.

        Returns `None` when `mime_type` is `None` or does not map to the canonical MIME-type of a
        `FileType` member or one of its alias MIME-types.
        N)r(   r)   r   r   )r   	mime_typer+   s      r   from_mime_typezFileType.from_mime_typeN   sP      '') 	AA222i1CVCV6V	 r    c                    | j                   S )a  The `pip` "extra" that must be installed to provide this file-type's dependencies.

        Like "image" for PNG, as in `pip install "unstructured[image]"`.

        `None` when partitioning this file-type requires only the base `unstructured` install.
        )r   r   s    r   r   zFileType.extra_name^   s     r    c                    | j                   S )a  Packages that must be importable for this file-type's partitioner to work.

        In general, these are the packages provided by the `pip install` "extra" for this file-type,
        like `pip install "unstructured[docx]"` loads the `python-docx` package.

        Note that these names are the ones used in an `import` statement, which is not necessarily
        the same as the _distribution_ package name used by `pip`. For example, the DOCX
        distribution package name is `"python-docx"` whereas the _importable_ package name is
        `"docx"`. This latter name as it appears like `import docx` is what is provided by this
        property.

        The return value is an empty tuple for file-types that do not require optional dependencies.

        Note this property does not complain when accessed on a non-partitionable file-type, it
        simply returns an empty tuple because file-types that are not partitionable require no
        optional dependencies.
        )r   r1   s    r   r   z(FileType.importable_package_dependenciesh   s    & 444r    c                ,    t        | j                        S )a~  True when there is a partitioner for this file-type.

        Note this does not check whether the dependencies for this file-type are installed so
        attempting to partition a file of this type may still fail. This is meant for
        distinguishing file-types like WAV, ZIP, EMPTY, and UNK which are legitimate file-types
        but have no associated partitioner.
        )boolr	   r1   s    r   is_partitionablezFileType.is_partitionable}   s     D//00r    c                    | j                   S )aL  The canonical MIME-type for this file-type, suitable for use in metadata.

        This value is used in `.metadata.filetype` for elements partitioned from files of this
        type. In general it is the "offical", "recommended", or "defacto-standard" MIME-type for
        files of this type, in that order, as available.
        )r   r1   s    r   r.   zFileType.mime_type   s     (((r    c                Z    | j                   x}t        d| j                   d      d| S )zName of partitioner function for this file-type. Like "partition_docx".

        Raises when this property is accessed on a file-type that is not partitionable. Use
        `.is_partitionable` to avoid exceptions when partitionability is unknown.
        z;`.partitioner_function_name` is undefined because FileType.b is not partitionable. Use `.is_partitionable` to determine whether a `FileType` is partitionable.
partition_r	   
ValueErrorr"   r   	shortnames     r   partitioner_function_namez"FileType.partitioner_function_name   sG     444I=Mdii[ Y% & 
 I;''r    c                Z    | j                   x}t        d| j                   d      d| S )zFully-qualified name of module providing partitioner for this file-type.

        e.g. "unstructured.partition.docx" for FileType.DOCX.
        z:`.partitioner_module_qname` is undefined because FileType.r8   zunstructured.partition.r:   r<   s     r   partitioner_module_qnamez!FileType.partitioner_module_qname   sG     444I=LTYYK X% & 
 )44r    c                    | j                   S )az  Familiar name of partitioner, like "image" for file-types that use `partition_image()`.

        One use is to determine whether a file-type is one of the five image types, all of which
        are processed by `partition_image()`.

        `None` for file-types that are not partitionable, although `.is_partitionable` is the
        preferred way of discovering that.
        )r	   r1   s    r   r   zFileType.partitioner_shortname   s     ***r    bmpimageunstructured_inferencez.bmpz	image/bmpcsvpandasz.csvztext/csv)zapplication/csvzapplication/x-csvztext/comma-separated-valuesztext/x-comma-separated-valuesz
text/x-csvdocdocxz.doczapplication/mswordz.docxzGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentemlemailNz.emlz.p7szmessage/rfc822epubpypandocz.epubzapplication/epubzapplication/epub+zipheicz.heicz
image/heichtmlz.htmlz.htmz	text/htmljpgz.jpegz.jpgz
image/jpegjsonz.jsonzapplication/jsonmdmarkdownz.mdztext/markdownztext/x-markdownmsgoxmsgz.msgzapplication/vnd.ms-outlookodtz.odtz'application/vnd.oasis.opendocument.textorgz.orgztext/orgpdf)	pdf2imagepdfminerPILz.pdfzapplication/pdfpngz.pngz	image/pngpptpptxz.pptzapplication/vnd.ms-powerpointz.pptxzIapplication/vnd.openxmlformats-officedocument.presentationml.presentationrstz.rstz
text/x-rstrtfz.rtfztext/rtfzapplication/rtftiffz.tiffz
image/tifftsvz.tabz.tsvztext/tsvtxttext)z.txtz.textz.cz.ccz.cppz.csz.cxxz.goz.javaz.jsz.logz.phpz.pyz.rbz.swiftz.tsz.yamlz.ymlz
text/plain)z	text/yamlzapplication/x-yamlzapplication/yamlztext/x-yamlwavz.wavz	audio/wav)zaudio/vnd.wavzaudio/vnd.wavez
audio/wavezaudio/x-pn-wavzaudio/x-wavxlsxlsxopenpyxlz.xlszapplication/vnd.ms-excelz.xlsxzAapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheetxmlz.xmlzapplication/xmlztext/xmlzipz.zipzapplication/zipunkzapplication/octet-streamemptyzinode/x-empty)r   r   r   r   r   Iterable[str]r   r   r   rl   r   r   r   rl   )r#   r   returnr4   )r*   r   rm   FileType | None)r.   r   rm   rn   )rm   r   )rm   r
   )rm   r4   )rm   r   )3__name__
__module____qualname____doc____annotations__r   r$   classmethodr,   r/   propertyr   r   r5   r.   r>   r@   r   r   listr   BMPCSVDOCDOCXEMLEPUBHEICHTMLJPGJSONMDMSGODTORGPDFPNGPPTPPTXRSTRTFTIFFTSVTXTWAVXLSXLSXXMLZIPUNKEMPTY r    r   r   r   	   s   
 '&c&55^T  bH&&<  * *7	
  " ! ((&  "       5 5( 1 1 ) ) ( (" 5 5  	+ 	+ 		!"	T#YC 		
		
C %&56(4H$tTWyZ\J]
^C			QT#YD 	T#Y	T#YC 					 D 		!"		T#YD 	T#Y	&T#YD 		!"	&T#YC 	T#Y		T#YD zlD5'?EVDW	XB			$T#YC 			1T#YC %*uvh
DcTVDW
XC(	T#YC 		!"	T#YC 			'T#YC 				ST#YD %*uvhd4PS9VXFY
ZC%*uvh
EVDW
XC	!"		T#YD %(UVV,<j$tTWyZ\J]
^CT#Y	
* 		
7#CJ 	T#Y		
C  		:	"T#YC 		:		KT#YD %d3i,dVH>OR\Q]
^C$T#Y+TF8=NPTUYZ]U^`bPc
dC 	T#YT#Y"T#YC 	T#YT#YT#YEr    r   )	rr   
__future__r   enumtypingr   r   Enumr   r   r    r   <module>r      s#    " "  !mtyy mr    