
    :QgD                     0   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZmZmZmZmZmZ dedee   fd	Zd
efdZdddddddddded
ededee   dee   dededededee   defdZ	 	 	 	 	 ddededee   dededee   defdZy)    N)BytesIO)Path)Optional)AnalysisDrawerFinalLayoutDrawerLayoutDrawerOCRLayoutDrawerODModelLayoutDrawerPdfminerLayoutDrawer)ExtractedLayoutDumperFinalLayoutDumperJsonLayoutDumperLayoutDumperObjectDetectionLayoutDumperOCRLayoutDumperdumperreturnc                 n   t        | t              rt        | j                               S t        | t              rt        | j                               S t        | t              rt        | j                               S t        | t              rt        | j                               S t        d|        )zFor a given layout dumper, return the corresponding layout drawer instance initialized with
    a dumped layout dict.

    Args:
        dumper: The layout dumper instance

    Returns:
        LayoutDrawer: The corresponding layout drawer instance
    layout_dumpzUnknown dumper type: )
isinstancer   r
   dumpr   r   r   r	   r   r   
ValueError)r   s    l/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/partition/pdf_image/analysis/tools.py_get_drawer_for_dumperr      s     &56"v{{}==	F1	2#>>	FO	,6;;=99	F-	. V[[];;09::    is_imagec                 `    t        j                         j                  dd }| rd| dS d| dS )ziGenerate a filename for the analysis artifacts based on the file type.
    Adds a random uuid suffix
    N   image_z.pngpdf_z.pdf)uuiduuid4hex)r   suffixs     r   _generate_filenamer&   /   s=     ZZ\bq!Fxt$$&r   FTpng)filenamefileskip_bboxesskip_dump_od	draw_griddraw_captionresizeformatlayout_dumpersanalyzed_image_output_dir_pathr(   r)   r*   r+   r,   r-   r.   r/   c        
   
      d   |st        |       }|s|ryt        |      }|j                  dd       |s5t        ||      }|
D ]  }|j	                  |        |j                          |sGt        ||| |||||	      }|
D ]  }t        |      }|j                  |         |j                          yy)a  Save the analysis artifacts for a given file. Loads some settings from
    the environment configuration.

    Args:
        layout_dumpers: The layout dumpers to save and use for bboxes rendering
        is_image: Flag for the file type (pdf/image)
        analyzed_image_output_dir_path: The directory to save the analysis artifacts
        filename: The filename of the sources analyzed file (pdf/image).
            Only one of filename or file should be provided.
        file: The file object for the analyzed file.
            Only one of filename or file should be provided.
        draw_grid: Flag for drawing the analysis bboxes on a single image (as grid)
        draw_caption: Flag for drawing the caption above the analyzed page (for e.g. layout source)
        resize: Output image resize value. If not provided, the image will not be resized.
        format: The format for analyzed pages with bboxes drawn on them. Default is 'png'.
    NTparentsexist_ok)r(   save_dir)r(   r)   r   r6   r,   r-   r.   r/   )	r&   r   mkdirr   add_layout_dumperprocessr   r   
add_drawer)r   r1   r(   r)   r*   r+   r,   r-   r.   r/   r0   output_pathjson_layout_dumperlayout_dumperanalysis_drawerdrawers                   r   save_analysis_artifiactsr@   9   s    : %h/l56KdT2- 
 , 	@M00?	@""$( %	
 , 	/M+M:F&&v.	/ 	! r   renders_output_dir_pathc           	         t        |       j                  }t        |       j                  j                  d       }t        |      dz  |z  dz  }	|	j	                         syg }
|	j                         D ]  }|j                         st        |      5 }t        j                  |      }ddd       |j                  dk(  r|
j                  t                     |j                  dk(  r|
j                  t                     |j                  dk(  r|
j                  t                     |j                  d	k(  s|
j                  t                      |
rp|st        |      dz  |z  d
z  }nt        |      }|j                  dd       t!        | ||||||      }|
D ]  }|j#                  |        |j%                          yy# 1 sw Y   (xY w)a  Render the bounding boxes for a given layout dimp file.
    To be used for analysis after the partition is performed for
    only dumping the layouts - the bboxes can be rendered later.

    Expects that the analyzed_image_output_dir_path keeps the structure
    that was created by the save_analysis_artifacts function.

    Args:
        filename: The filename of the sources analyzed file (pdf/image)
        analyzed_image_output_dir_path: The directory where the analysis artifacts
          (layout dumps) are saved. It should be the root directory of the structure
          created by the save_analysis_artifacts function.
        renders_output_dir_path: Optional directory to save the rendered bboxes -
          if not provided, it will be saved in the analysis directory.
        draw_grid: Flag for drawing the analysis bboxes on a single image (as grid)
        draw_caption: Flag for drawing the caption above the analyzed page (for e.g. layout source)
        resize: Output image resize value. If not provided, the image will not be resized.
        format: The format for analyzed pages with bboxes drawn on them. Default is 'png'.
    pdfanalysisr   Nfinalr   object_detectionocrpdfminerbboxesTr3   )r(   r6   r   r,   r-   r.   r/   )r   stemr%   endswithexistsiterdiris_fileopenjsonloadappendr   r
   r	   r   r7   r   r:   r9   )r(   r1   rA   r,   r-   r.   r/   filename_stemr   analysis_dumps_dirlayout_drawersanalysis_dump_filenamefr   r;   r>   r?   s                    r   render_bboxes_for_filerX   x   s   8 N''M>((11%88H+,z9MIMY  $$&N"4"<"<"> Q%--/() 	'Q))A,K	'!&&'1!!"3"LM!&&*<<!!"5+"NO!&&%/!!/k"JK!&&*4!!"6;"OPQ &34zAMQT\\  67K$6( %
 % 	/F&&v.	/!) 	' 	's   GG	)NFTNr'   )rP   r"   ior   pathlibr   typingr   <unstructured.partition.pdf_image.analysis.bbox_visualisationr   r   r   r	   r
   r   5unstructured.partition.pdf_image.analysis.layout_dumpr   r   r   r   r   r   r   boolr&   strfloatr@   rX    r   r   <module>rb      sK         ;< ;H\4J ;,  #""<"!<"<" %(<" sm	<"
 7
<" <" <" <" <" UO<" <"D .2"F"F"$'F" &c]F" 	F"
 F" UOF" F"r   