
    :Qg*                    :   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZ  ej                         d        Z ej                         d        Z ej                         d        Zd	 Zd
 Zd Zd Zd Zd Zd Zej0                  j3                  dd dgd ed      d gd ed      g      d        Zej0                  j3                  dg dg      d        Zej0                  j3                  dd gd ed      g      d        Zej0                  j3                  dd dgd ed      g      d        Zej0                  j3                  dg dg      d        Zej0                  j3                  dddddd d!d"gd!d#d$d d%d&d&d'd(d)fdddddd*d+gdd,d-d.d%d&d&d'd(d)fdddddd*d+gdd/d-d d%d&d&d'd(d)fdddd0dd+d*gdd1d-d d%d&d&d'd(d)fddd0ddd*d+gdd2d-d d%d&d&d'd(d)fdddd3dd*d+gdd4d-d d%d&d&d'd(d)fd5d6dddd*d+gdd7d8d d9d:d;d<d(d)fg      d=        Z d> Z!d? Z"d@ Z#dA Z$y)B    )annotationsN)utils)
PixelSpace)ElementMetadataNarrativeTextTitlec                     ddidddidgS )NtextzThis is a sentence.zThis is another sentence.scoreg?)r
   meta r       Y/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/test_utils.py
input_datar      s#     
&',wnE r   c                B    t         j                  j                  | d      S )Nzoutput.jsonl)ospathjoin)tmp_paths    r   output_jsonl_filer      s    77<<.11r   c           	         t         j                  j                  | d      }t        |d      5 }|j	                  |D cg c]  }t        j                  |      dz    c}       d d d        |S c c}w # 1 sw Y   |S xY w)Nzinput.jsonlzw+
)r   r   r   open
writelinesjsondumps)r   r   	file_path
input_fileobjs        r   input_jsonl_filer       sm    X}5I	i	 N*L#tzz#5LMN MNs   A2A-
A2-A22A<c                    t        j                  | |       t        |      5 }|D cg c]  }t        j                  |       }}d d d        | k(  sJ y c c}w # 1 sw Y   xY wN)r   save_as_jsonlr   r   loads)r   r   output_fileline	file_datas        r   test_save_as_jsonlr(   "   sb    	
$56		  ?K2=>$TZZ%>	>?
""" ?? ?s   AAAAA#c                <    t        j                  |       }||k(  sJ y r"   )r   read_from_jsonl)r    r   r'   s      r   test_read_as_jsonlr+   )   s!    %%&67I
"""r   c                 J    t        j                  d      d        }  |         y )Nnumpydependenciesc                     dd l } y Nr   r-   r2   s    r   	test_funcz7test_requires_dependencies_decorator.<locals>.test_func/   s    r   r   requires_dependenciesr3   s    r   $test_requires_dependencies_decoratorr7   .   s&    
  g6 7 Kr   c                 N    t        j                  ddg      d        }  |         y )Nr-   pandasr.   c                     dd l } dd l}y r1   r-   r9   r;   s     r   r3   z@test_requires_dependencies_decorator_multiple.<locals>.test_func7   s    r   r4   r6   s    r   -test_requires_dependencies_decorator_multipler<   6   s,    
  w.AB C Kr   c                     t        j                  d      d        } t        j                  t              5   |         d d d        y # 1 sw Y   y xY w)Nnot_a_packager.   c                     dd l } y r1   r>   r@   s    r   r3   zDtest_requires_dependencies_decorator_import_error.<locals>.test_func@   s    r   r   r5   pytestraisesImportErrorr6   s    r   1test_requires_dependencies_decorator_import_errorrE   ?   sG    
  o> ? 
{	#   s   AAc                     t        j                  ddg      d        } t        j                  t              5   |         d d d        y # 1 sw Y   y xY w)Nr>   r-   r.   c                     dd l } dd l}y r1   r>   r-   rH   s     r   r3   zMtest_requires_dependencies_decorator_import_error_multiple.<locals>.test_funcI   s    r   rA   r6   s    r   :test_requires_dependencies_decorator_import_error_multiplerI   H   sM    
  .HI J 
{	#   s   A		Ac                 X    t        j                  d       G d d             }  |         y )Nr-   r.   c                      e Zd Zd Zy)@test_requires_dependencies_decorator_in_class.<locals>.TestClassc                    dd l }y r1   r2   )selfr-   s     r   __init__zItest_requires_dependencies_decorator_in_class.<locals>.TestClass.__init__U   s    r   N)__name__
__module____qualname__rO   r   r   r   	TestClassrL   S   s    	r   rS   r4   )rS   s    r   -test_requires_dependencies_decorator_in_classrT   R   s+    
  g6  7 Kr   iterator   )r   rV   
   )r   c                8    t        j                  |       dk(  sJ y r1   r   firstrU   s    r   test_first_gives_firstr\   [       ;;x A%%%r   r   c                    t        j                  t              5  t        j                  |        d d d        y # 1 sw Y   y xY wr"   )rB   rC   
ValueErrorr   rZ   r[   s    r   test_first_raises_if_emptyr`   `   s/    	z	" H  	   9Ac                8    t        j                  |       dk(  sJ y r1   rY   r[   s    r   test_only_gives_onlyrc   f   r]   r   c                    t        j                  t              5  t        j                  |        d d d        y # 1 sw Y   y xY wr"   rB   rC   r_   r   onlyr[   s    r   %test_only_raises_when_len_more_than_1rg   k   /    	z	" 

8  ra   c                    t        j                  t              5  t        j                  |        d d d        y # 1 sw Y   y xY wr"   re   r[   s    r   test_only_raises_if_emptyrj   q   rh   ra   )coords1coords2text1text2nested_error_tolerance_pxexpectation))      )rq      )   rs   )rt   rr   ))      )ru      rr   rw   rr   rv   Some lovely titleSome lovely textrr   zTitle(ix=0)zNarrativeText(ix=1)znested NarrativeText in Titlez100%z5.88%u   9pxˆ2u   18pxˆ2)largest_ngram_percentageoverlap_percentage_totalmax_areamin_area
total_area)overlapping_elementsparent_elementoverlapping_caseoverlap_percentagemetadataz0. Title(ix=0)z1. NarrativeText(ix=1)zFpartial overlap sharing 50.0% of the text from1. NarrativeText(2-gram)z11.11%g      I@z#partial overlap with duplicate text z6partial overlap with empty content in 1. NarrativeTextz.partial overlap with empty content in 0. Titlez Something totally different herez$partial overlap without sharing text)rx   )rr   rW   )rs   rW   )rs   rw   ))rV   rv   )ru   rt   )rw   rt   ry   zSmall partial overlapz8.33%z3.23%u   20pxˆ2u   12pxˆ2u   32pxˆ2c           	         t        || t        dd      t        d            t        ||t        dd      t        d            g}t	        j
                  ||d      \  }}|du sJ |d	   |k(  sJ y )
N   widthheightrV   page_numberr
   coordinatescoordinate_systemr         $@sm_overlap_thresholdTr   r   r   r   r   r   #catch_overlapping_and_nested_bboxes)	rk   rl   rm   rn   ro   rp   elementsoverlapping_flagoverlapping_casess	            r   (test_catch_overlapping_and_nested_bboxesr   w   s    l 	(r"=$3		
 	(r"=$3		
H +0*S*S!!+''
 t###Q;...r   c            	         t        ddt        dd      t        d            t        dd	t        dd      t        d            g} t	        j
                  | dd
      \  }}|du sJ |g k(  sJ y )Nrz   ))rq   rw   )rq   rt   )rt   rt   )rt   rw   r   r   rV   r   r   r{   ))rw   rs   )rw   	   )r   r   )r   rs   r   r   Fr   )r   r   r   s      r   =test_catch_overlapping_and_nested_bboxes_non_overlapping_caser   #  s    $8(r"=$3		
 	#8(r"=$3		
H +0*S*S	!+''
 u$$$"""r   c                 B    dg} t        j                  |       }|dk(  sJ y )N*   )r   rf   )singleton_iterableresults     r   $test_only_returns_singleton_iterabler   ;  s%    ZZ*+FR<<r   c                     ddg} t        j                  t              5  t        j                  |        d d d        y # 1 sw Y   y xY w)Nr   r   re   )r   s    r   *test_only_raises_on_non_singleton_iterabler   A  s:    a	z	" '

%&' ' 's	   =Ac                 l    d} d}d}t        j                  | ||      \  }}|dk(  sJ t        |      rJ y )Nr   zbanana orange pineappleru   r   )r   !calculate_shared_ngram_percentagebool)str1str2npercentcommon_ngramss        r   Ftest_calculate_shared_ngram_percentage_returns_null_vals_for_empty_strr   G  sH    D$D	A"DDT4QRSG]a<<M""""r   )%
__future__r   r   r   rB   unstructuredr   "unstructured.documents.coordinatesr   unstructured.documents.elementsr   r   r   fixturer   r   r    r(   r+   r7   r<   rE   rI   rT   markparametrizeranger\   r`   rc   rg   rj   r   r   r   r   r   r   r   r   <module>r      s   "  	   9 Q Q   2 2  ##
 q!ffeBi!dERSH%UV& W& b"X. /
 qc4q%:;& <& q!ffeBi%@A B
 b"X. /
 X -,)68M(N"/$C&,0107 ( ("+	
* -,)9;S(T"&%(&.0407 ( ("+	
, -,)9;S(T"&$I&.0107 ( ("+	
* -,)ACS(T"&%]&.0107 ( ("+	
* -,)9;S(T"&$T&.0107 ( ("+	
* -,.)9;S(T"&$J&.0107 ( ("+	
* /,)9;S(T"&$;&-0107 ) )"+	
uNQd/eQd/4#0'#r   