
    :QgU5              
          d dl Z d dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZmZ d dlmZ d d
lmZ  G d de	j@                        Z! G d de	jD                        Z#d Z$d Z%ejL                  jO                  ddedfdedfdedfdedfg      d        Z(d Z)d Z*d Z+d Z,ejL                  jO                  d ejZ                               de.de/e   fd        Z0ejL                  jO                  d!ejb                  d"fejd                  d#fejf                  d"fejh                  d#fejj                  d#fejl                  d"fg      de.d$e7fd%       Z8d& Z9d' Z: G d( d)      Z;d* Z<d+ Z=d, Z>ejL                  jO                  d-d.d/g      d0        Z?d1 Z@ ed2      fd3ZAy)4    N)Pool)Image)layout)
TextRegionLayoutElement)example_doc_path)
PixelSpace)
TYPE_TO_TEXT_ELEMENT_MAPCheckBoxCoordinatesMetadataElementTypeFigureCaptionHeaderListItemNarrativeTextTextTitle)commonc                   @    e Zd Zdedej                  fdZed        Zy)MockPageLayoutnumberimagec                      || _         || _        y Nr   r   )selfr   r   s      k/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/partition/common/test_common.py__init__zMockPageLayout.__init__    s    
    c                 n    t        ddd       t        ddd       t        ddd       t        dd	d       gS )
NHeadlinez#Charlie Brown and the Great Pumpkin)typetextbboxSubheadlinezThe Beginningr   z/This time Charlie Brown had it really tricky...r   z#Another book title in the same pager   r   s    r   elementszMockPageLayout.elements$   sX     :
 "$
 F
 :!
 	
r    N)__name__
__module____qualname__intr   r   propertyr(    r    r   r   r      s-    s 5;;  
 
r    r   c                       e Zd Zed        Zy)MockDocumentLayoutc                 F    t        dt        j                  dd            gS )N   1)r2   r2   r   )r   r   newr'   s    r   pageszMockDocumentLayout.pages?   s#     !599S&+AB
 	
r    N)r)   r*   r+   r-   r5   r.   r    r   r0   r0   >   s    
 
r    r0   c            	          dddgddgddgdd	ggd d
d} t        dd      }t        j                  | |      }|t        d
ddgddgddgdd	gg|      k(  sJ y )Nr   r2                        Some lovely text)r#   coordinatescoordinate_systemr$   
      widthheightr@   r$   r?   r@   )r
   r   normalize_layout_elementr   layout_elementr@   elements      r   "test_normalize_layout_element_dictrL   F   s    AAAA7!"	N #B7--+G eVaVaVaV4+   r    c                  ~    dddd} t        dd      }t        j                  | |      }|t        dd|	      k(  sJ y )
NFigure)r2   r7   r8   r9   )r:   r;   )r<   r=   r>   r#   r?   r$   rA   rB   rC   rF   rG   )r
   r   rH   ImageElementrI   s      r   *test_normalize_layout_element_dict_captionrS   Y   s[    7"N
 #B7--+G l4+   r    )element_typeexpected_typeexpected_depthr   r"   r2   r&   r7   r   c                     | ddgddgddgddggd	d
}t        dd      }t        j                  ||      }|j                  j                  |k(  sJ t        ||      sJ y )Nr2   r7   r8   r9   r:   r;   r<   r=   r>   rQ   rA   rB   rC   rF   )r
   r   rH   metadatacategory_depth
isinstance)rT   rU   rV   rJ   r@   rK   s         r   &test_normalize_layout_element_headliner[   k   sy     AAAA7"N
 #B7--nPabG**n<<<g}---r    c            	          dddgddgddgdd	ggd
d} t        dd      }t        j                  | |      }|t        d
ddgddgddgdd	gg|      k(  sJ y )Nr   r2   r7   r8   r9   r:   r;   r<   r=   r>   rQ   rA   rB   rC   rF   rG   )r
   r   rH   r   rI   s      r   1test_normalize_layout_element_dict_figure_captionr]      s    AAAA7"N
 #B7--+G mVaVaVaV4+   r    c            	          dddgddgddgdd	ggd
d} t        dd      }t        j                  | |      }|t        d
ddgddgddgdd	gg|      k(  sJ y )NMiscr2   r7   r8   r9   r:   r;   r<   r=   r>   rQ   rA   rB   rC   rF   rG   )r
   r   rH   r   rI   s      r   'test_normalize_layout_element_dict_miscr`      s    AAAA7"N
 #B7--+G dVaVaVaV4+   r    c                      t        j                  dddddd      } t        dd	
      }t        j                  | |      }|t        dd|      k(  sJ y )Nr   r2   r7   r8   r9   r>   r#   x1y1x2y2r$   rA   rB   rC   rF   rO   )r2   r9   rP   )r8   r7   rG   r   from_coordsr
   r   rH   r   rI   s      r   ,test_normalize_layout_element_layout_elementrj      sm    "..N #B7--+G m4+   r    c                      t        j                  dddddd      } t        dd	
      }t        j                  | |      }|t        dd|      k(  sJ y )Nr   r2   r7   r8   r9   r>   rb   rA   rB   rC   rF   rg   rG   rh   rI   s      r   ;test_normalize_layout_element_layout_element_narrative_textrl      sm    "..N #B7--+G m4+   r    )rT   expected_element_classrT   rm   c                     t        j                  | ddddd      }t        dd	      }t        j                  ||
      }| |dd|      k(  sJ y )Nr2   r7   r8   r9   r>   rb   rA   rB   rC   rF   rg   rG   )r   ri   r
   r   rH   )rT   rm   rJ   r@   rK   s        r   Mtest_normalize_layout_element_layout_element_maps_to_appropriate_text_elementro      sp     #..N #B7--+G ,4+   r    )rT   expected_checkedFTrp   c                     t        j                  | ddddd      }t        dd	      }t        j                  ||
      }t        |t              sJ |t        |d|      k(  sJ y )Nr2   r7   r8   r9    rb   rA   rB   rC   rF   rg   )checkedr?   r@   )r   ri   r
   r   rH   rZ   r   )rT   rp   rJ   r@   rK   s        r   'test_normalize_layout_element_checkablert      s     #..N #B7--+G gx(((h 4+   r    c                      t        j                  dddddd      } t        dd	
      }t        j                  | |      }|t        dd|      t        dd|      t        dd|      gk(  sJ y )NListr2   r7   r8   r9   z61. I'm so cool! 2. You're cool too. 3. We're all cool!rb   rA   rB   rC   rF   I'm so cool!rg   rG   You're cool too.We're all cool!r   ri   r
   r   rH   r   rJ   r@   r(   s      r   -test_normalize_layout_element_enumerated_listr|     s    "..EN #B7..+H 8/	

 	#8/	

 	"8/	
   r    c                      t        j                  dddddd      } t        dd	
      }t        j                  | |      }|t        dd|      t        dd|      t        dd|      gk(  sJ y )Nrv   r2   r7   r8   r9   z3* I'm so cool! * You're cool too. * We're all cool!rb   rA   rB   rC   rF   rw   rg   rG   rx   ry   rz   r{   s      r   +test_normalize_layout_element_bulleted_listr~   ,  s    "..BN #B7..+H 8/	

 	#8/	

 	"8/	
   r    c                       e Zd Zd Zy)MockRunOutputc                 .    || _         || _        || _        y r   )
returncodestdoutstderr)r   r   r   r   s       r   r   zMockRunOutput.__init__O  s    $r    N)r)   r*   r+   r   r.   r    r   r   r   M  s    r    r   c                     ddl m} d }| j                  |d|       t        j                  ddd       d	|j
                  v sJ y )
Nr   )
subprocessc                  T    t        ddj                         dj                               S )Nr2   zan error occurredzerror details)r   encode)argskwargss     r   mock_runz9test_convert_office_doc_captures_errors.<locals>.mock_runX  s$    Q 3 : : <o>T>T>VWWr    runzno-real.docxzfake-directorydocx)target_formatz4soffice failed to convert to format docx with code 1)$unstructured.partition.common.commonr   setattrr   convert_office_docr$   )monkeypatchcaplogr   r   s       r   'test_convert_office_doc_captures_errorsr   U  sD    ?X 
E84
n.>fUAV[[PPPr    c            	         dD  cg c]  } t        j                  |        }} |D ])  } | j                  d       | dz  j                  d       + t	        d      }t        d      5 }|j                  t        j                  |D  cg c]  } || f c}        d d d        t        j                  |D  cg c]  } | dz  j                          c}       dk(  sJ y c c} w c c} w # 1 sw Y   MxY wc c} w )N)z
/tmp/proc1z
/tmp/proc2z
/tmp/proc3T)exist_oksimple.docx
missing_ok
simple.docr8   pathlibPathmkdirunlinkr	   r   starmapr   r   npsumis_filepathpaths_to_savefile_to_convertpools       r   :test_convert_office_docs_avoids_concurrent_call_to_sofficer   `  s    4^_DW\\$'_M_ 7

D
!		%%%67 '|4O	a eDV..Ub0cT/41H0cde 66ND=(113NOSTTTT ` 1de e Os)   C'C$CC$3C0C$$C-c                     dD  cg c]  } t        j                  |        }} |D ]*  } | j                  dd       | dz  j                  d       , t	        d      }t        d      5 }|j                  t        j                  |D  cg c]	  } || dd d	f c}        d d d        t        j                  |D  cg c]  } | dz  j                          c}       dk  sJ y c c} w c c} w # 1 sw Y   MxY wc c} w )
N)z/tmp/wait/proc1z/tmp/wait/proc2z/tmp/wait/proc3T)parentsr   r   r   r   r8   r   r   r   r   s       r   .test_convert_office_docs_respects_wait_timeoutr   m  s    '`#TM   7

4$
/		%%%67 '|4O	a 
D%% CPP$otVT15P		

 66ND=(113NORSSSS% Q
 
 Os)   C(C(C#C(7C4#C((C1r$   expected)u@   <table><tbody><tr><td>👨\U+1F3FB🔧</td></tr></tbody></table>T)z6<table><tbody><tr><td>Hello!</td></tr></tbody></table>Fc                 6    t        j                  |       |u sJ y r   )r   contains_emojir   s     r   test_contains_emojir     s       &(222r    c                      t               } t        j                  | j                  d         }t	        |t
              sJ y )Nr   )r0   r   get_page_image_metadatar5   rZ   dict)docrX   s     r   2test_get_page_image_metadata_and_coordinate_systemr     s2    

C--ciil;Hh%%%r    z img/layout-parser-paper-fast.jpgc           	         t        j                  ddddd      t        j                  ddd	d
d      g}|D cg c]=  }t        |j                  |j                  |j
                  t        j                        ? }}t        j                  |       }t        j                  ||j                        }t        |      t        |      k(  sJ |D ch c]  }|j                   c}t        j                  hk(  sJ |j                  \  }}t        ||      }	t!        ||      D ]?  \  }}
|j"                  j$                  t'        |
j                  j$                  |	      k(  r?J  y c c}w c c}w )Ng     `d@g     \@g     @|@g      `@z(LayoutParser: A Unified Toolkit for Deep)r$   g     c@g     `@g     |@g     `b@z&Learning Based Document Image Analysis)r%   r$   sourcer#   )ocr_data
image_sizerC   )pointssystem)r   ri   r   r%   r$   r   r   UNCATEGORIZED_TEXTr   openr   ocr_data_to_elementssizelencategoryr
   ziprX   r?   r   )filenametext_regionsrr   r   r(   elimage_widthimage_heightr@   	layout_els              r   test_ocr_data_to_elementsr     sh    	;	
 	9	
L.   	88//		
H  JJx E**::H
 x=CM)))"*+BBKK+0N0N/OOOO !&

K"\JXx0 
I{{&&*=>>--$+
 
 	
 

-" ,s   AE%E*)Br   multiprocessingr   numpyr   pytestPILr    unstructured_inference.inferencer   )unstructured_inference.inference.elementsr   .unstructured_inference.inference.layoutelementr   test_unstructured.unit_utilsr	   "unstructured.documents.coordinatesr
   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r   rR   unstructured.partition.commonr   
PageLayoutr   DocumentLayoutr0   rL   rS   markparametrizer[   r]   r`   rj   rl   itemsstrr#   ro   CHECK_BOX_UNCHECKEDCHECK_BOX_CHECKEDRADIO_BUTTON_UNCHECKEDRADIO_BUTTON_CHECKEDCHECKED	UNCHECKEDboolrt   r|   r~   r   r   r   r   r   r   r   r.   r    r   <module>r      s         3 @ H 9 9   1
V&& 
>
.. 
&$ 7	%	UA	q!	64 		.	.$$** ."""$ J	0 (		(	(%0		&	&-		+	+U3		)	)40			d#			&
# QU 
,BB Q
UT, SI33& @A-
r    