
    :Qg                         d dl mZmZ d dlmZ d dlmZ d dlmZ dde	de
fdZ	 	 	 	 	 dde	de
d	e
d
e
de
deee	      fdZd Z	 	 	 dd	e
d
e
de
fdZy)    )ListOptional)logger)PartitionStrategy)dependency_existsstrategyis_imagec                     t         j                  t         j                  t         j                  t         j                  g}| |vrt        |  d      | t         j                  k(  r|rt        d      yy)z?Determines if the strategy is valid for the specified filetype.z is not a valid strategy.z3The fast strategy is not available for image files.N)r   AUTOFASTOCR_ONLYHI_RES
ValueError)r   r	   valid_strategiess      ^/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/partition/strategies.pyvalidate_strategyr      ss     	""  	 ''H:%>?@@$)))hNOO /7)    Npdf_text_extractableinfer_table_structureextract_images_in_pdfextract_image_block_typesc                    t        d      }t        d      }| t        j                  k(  r*|xs t        |      }|rt	               } nt        |||      } t        | | | g      rt        d      | t        j                  k(  rc|sat        j                  d       |r%t        j                  d       t        j                  S t        j                  d       t        j                  S | t        j                  k(  rc|sat        j                  d       |r%t        j                  d       t        j                  S t        j                  d	       t        j                  S | S )
zDetermines what strategy to use for processing PDFs or images, accounting for fallback
    logic if some dependencies are not available.unstructured_pytesseractunstructured_inferencer   r   extract_elementzunstructured_inference is not installed, pytesseract is not installed and the text of the PDF is not extractable. To process this file, install unstructured_inference, install pytesseract, or remove copy protection from the PDF.zunstructured_inference is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with another strategy.z+Falling back to partitioning with ocr_only.z'Falling back to partitioning with fast.zpytesseract is not installed. Cannot use the ocr_only partitioning strategy. Falling back to partitioning with another strategy.z)Falling back to partitioning with hi_res.)r   r   r   bool_determine_image_auto_strategy_determine_pdf_auto_strategyallr   r   r   warningr   r   )	r   r	   r   r   r   r   pytesseract_installed unstructured_inference_installedr   s	            r   determine_pdf_or_image_strategyr$      sI    ..HI'89Q'R$$)))/R48Q3R57H3%9&; /H -	-3H/HNbJbc 6
 	
 $+++4TL	
 !NNHI$---NNDE$)))	&//	/8ML	
  NNDE$)))NNFG$+++Or   c                  "    t         j                  S )zWIf "auto" is passed in as the strategy, determines what strategy to use
    for images.)r   r    r   r   r   r   W   s     ###r   r   c                 n    |s|rt         j                  S | rt         j                  S t         j                  S )zUIf "auto" is passed in as the strategy, determines what strategy to use
    for PDFs.)r   r   r   r   r   s      r   r   r   ^   s0      ''' %%% )))r   )F)FFFFN)FFF)typingr   r   unstructured.loggerr   &unstructured.partition.utils.constantsr   unstructured.utilsr   strr   r   r$   r   r   r&   r   r   <module>r-      s    ! & D 0P Pt P$ !&"'"'59<<< <  	<
  <  (S	2<~$ "'"'!*** *r   