
    :Qgo              
         d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	Z	ddl
mZmZ ddlmZmZ ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZ dd	lm Z  e	jB                  jE                  d
 e        ed       e       g      	 	 d)d       Z#d Z$e	jB                  jE                  d
 e        ed       edd       e       g      d)d       Z%d Z&d Z'd Z(d Z)e	jB                  jE                  ddejT                  ejT                  dfdejT                  ejV                  dfdejV                  ejT                  dfdejV                  ejV                  dfg      	 	 	 	 	 	 	 	 d*d       Z,d Z-d Z.d Z/d Z0d Z1d Z2 G d  d!      Z3d" Z4d# Z5d$ Z6e	jB                  jE                  d%g d&      	 	 	 	 	 	 	 	 	 	 d+d'       Z7d( Z8y),z8Test-suite for `unstructured.documents.elements` module.    )annotationsN)partial)assign_hash_idsexample_doc_path)clean_bulletsclean_prefix)CoordinateSystemOrientationRelativeCoordinateSystem)
CheckBoxConsolidationStrategyCoordinatesMetadataDataSourceMetadataElementElementMetadataPointsTextTitleassign_and_map_hash_ids)partition_jsonelement textc                P   t        | j                  t              sJ t        | j                        dk(  sJ | j                  j	                  d      dk(  sJ d}| j                  d      |k(  sJ | j                  |k(  sJ | j                  d      |k(  sJ | j                  |k(  sJ y )N$   -    5336294a19f32ff03ef80066fbc3e0f7r   )
isinstanceidstrlencount
id_to_hash)r   expected_hashs     f/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/documents/test_elements.pyQtest_Element_autoassigns_a_UUID_then_becomes_an_idempotent_and_deterministic_hashr(   %   s    
 gjj#&&&wzz?b   ::C A%%%6Ma M111::&&& a M111::&&&    c                 `    t        j                  t        dd       j                                y )Nzhello there!r   
element_id)jsondumpsr   to_dict r)   r'   test_Text_is_JSON_serializabler1   8   s    JJtD9AACDr)   r+   c                    t        | j                  t              sJ t        | j                        dk(  sJ | j                  j	                  d      dk(  sJ y )Nr   r   r   )r    r!   r"   r#   r$   r   s    r'   *test_Element_self_assigns_itself_a_UUID_idr4   =   sK     gjj#&&&wzz?b   ::C A%%%r)   c                 |    t        d      } | j                  t        t        d             t	        |       dk(  sJ y )N$[1] A Textbook on Crocodile Habitatsr   \[\d{1,2}\]pattern A Textbook on Crocodile Habitats)r   applyr   r   r"   )text_elements    r'    test_text_element_apply_cleanersr=   L   s6    CDLw|^DE| BBBBr)   c                     t        t        d      t        t              g} t        d      } |j                  |   t        |      dk(  sJ y )Nr7   r8   u(   [1] • A Textbook on Crocodile Habitatsr   r:   )r   r   r   r   r;   r"   )cleanersr<   s     r'   )test_text_element_apply_multiple_cleanersr@   S   sG    n=w}?UVHJKLL!| BBBBr)   c                     t               } t        | d      sJ | j                  J | j                  dk(  sJ t        |       dk(  sJ y )Nr   r   )r   hasattrr   r"   r3   s    r'   /test_non_text_elements_are_serializable_to_textrC   Z   sM    jG7F###<<###<<2w<2r)   c                     dd} t        d      }t        j                  t        d      5  |j	                  |        d d d        y # 1 sw Y   y xY w)Nc                     y)N   r0   )ss    r'   bad_cleanerzFtest_apply_raises_if_func_does_not_produce_string.<locals>.bad_cleanerc   s    r)   r6   r   z%Cleaner produced a non-string output.match)rG   r"   )r   pytestraises
ValueErrorr;   )rH   r<   s     r'   1test_apply_raises_if_func_does_not_produce_stringrN   b   sF     CDL	z)P	Q (;'( ( (s   AA)coordinatesorientation1orientation2expected_coords)rF      rF   r      r   rW   rT   ))
      )rY   (   )   r[   )r\   rZ   ))rY     )rY     )r\   r^   )r\   r]   c                l   t        dd      }||_        t        dd      }||_        t        | |      }|j                  |      }|J t	        ||      D ]  \  }}	|t        j                  |	      k(  rJ  |j                  |d       |j                  j                  J |j                  j                  j                  J t	        |j                  j                  j                  |      D ]  \  }}	|t        j                  |	      k(  rJ  |j                  j                  j                  |k(  sJ y )Nd      i  i  rO   coordinate_systemT)in_place)r	   orientationr   !convert_coordinates_to_new_systemziprK   approxmetadatarO   pointssystem)
rO   rP   rQ   rR   coord1coord2r   
new_coords	new_coordexpecteds
             r'   &test_convert_coordinates_to_new_systemrq   l   s4   F c3'F%FdD)F%F+HG::6BJ!!!":? 4	8FMM(33334--ft-D''333''..:::"7#3#3#?#?#F#FX 4	8FMM(33334''..&888r)   c                     t        d d       } t        dd      }t        j                  |_        | j                  |      J y )Nrb   r`   ra   )r   r	   r
   SCREENre   rf   )r   coords     r'   *test_convert_coordinate_to_new_system_noneru      s?    $$?GS#&E#**E44U;CCCr)   c                     d} t               }t        | |      }t        | |      }|j                  j                  |k(  sJ y )NrS   rb   rj   rk   )r   r   r   ri   rO   )rO   rc   r   expected_coordinates_metadatas       r'   0test_element_constructor_coordinates_all_presentry      sK    2K02+ARSG$7 %! ''+HHHHr)   c                     t        j                  t              5 } t        t	                      d d d        t         j                        dk(  sJ y # 1 sw Y   $xY w)N)rc   NCoordinates points should not exist without coordinates system and vice versa.)rK   rL   rM   r   r   r"   valueexc_infos    r'   2test_element_constructor_coordinates_points_absentr      sS    	z	" >h":"<=> 	HNN[	\	\> >s   AAc                     t        j                  t              5 } t        d       d d d        t	         j
                        dk(  sJ y # 1 sw Y   $xY w)NrS   rO   r{   )rK   rL   rM   r   r"   r|   r}   s    r'   2test_element_constructor_coordinates_system_absentr      sP    	z	" >h<=> 	HNN[	\	\> >s   A

Ac                     d} t               }t        | |      }ddddd}|j                         }||k(  sJ t        j                  |      |k(  sJ y )NrS   rw   rF   r   layout_heightlayout_widthrj   rk   )r   r   r/   	from_dict)rO   rc   coordinates_metadataexpected_schemacoordinates_metadata_dicts        r'   test_coordinate_metadata_serdesr      sk    2K02.kJ[\2,	O !5 < < >$777(()BCG[[[[r)   c                 z    d} t               }t        d| |      }|j                         ddddddid ddd	k(  sJ y )
NrS   awt32t1)r,   rO   rc   rO   rF   r   r   r   )ri   typer   r,   )r   r   r/   )rO   rc   r   s      r'   test_element_to_dictr      sg    2K02+G ??!" !:4	
 !   r)   c                     e Zd ZdZd Zej                  j                  d ej                  d      dz  dz  dg      	 	 d(d       Z
d	 Zej                  j                  d ej                  d
      d
g      	 	 d(d       Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#d# Z$d$ Z%d% Z&d& Z'y'))DescribeElementMetadatazFUnit-test suite for `unstructured.documents.elements.ElementMetadata`.c                |    t        j                  t        d      5  t        d       d d d        y # 1 sw Y   y xY w)Nz.got an unexpected keyword argument 'file_name'rI   	memo.docx)	file_name)rK   rL   	TypeErrorr   selfs    r'   Hit_detects_unknown_constructor_args_at_both_development_time_and_runtimez`DescribeElementMetadata.it_detects_unknown_constructor_args_at_both_development_time_and_runtime   s.    ]]9,\] 	3k2	3 	3 	3s   2;	file_pathdocuments/docxmemosmemo-2023-11-10.docxz)documents/docx/memos/memo-2023-11-10.docxc                `    t        |      }|j                  dk(  sJ |j                  dk(  sJ y )Nfilenamezdocuments/docx/memosr   r   file_directoryr   r   r   metas      r'   Ait_accommodates_either_a_pathlib_Path_or_str_for_its_filename_argzYDescribeElementMetadata.it_accommodates_either_a_pathlib_Path_or_str_for_its_filename_arg   s6     	2""&<<<<}} 6666r)   c                P    t               }|j                  J |j                  J y )Nr   r   r   s     r'   Iit_leaves_both_filename_and_file_directory_None_when_neither_is_specifiedzaDescribeElementMetadata.it_leaves_both_filename_and_file_directory_None_when_neither_is_specified  s,     ""***}}$$$r)   r   c                Z    t        |      }|j                  J |j                  dk(  sJ y )Nr   r   r   r   s      r'   Oand_it_leaves_file_directory_None_when_not_specified_and_filename_is_not_a_pathzgDescribeElementMetadata.and_it_leaves_file_directory_None_when_not_specified_and_filename_is_not_a_path  s2     	2""***}}+++r)   c                `    t        d      }|j                  dk(  sJ |j                  dk(  sJ y )Nz#documents/docx/memo-2023-11-11.docxr   r   zmemo-2023-11-11.docxr   r   s     r'   Mand_it_splits_off_directory_path_from_its_filename_arg_when_it_is_a_file_pathzeDescribeElementMetadata.and_it_splits_off_directory_path_from_its_filename_arg_when_it_is_a_file_path  s5    (MN""&6666}} 6666r)   c                b    t        dd      }|j                  dk(  sJ |j                  dk(  sJ y )Nztmp/staging/memo.docxr   )r   r   r   r   r   s     r'   Lbut_it_prefers_a_specified_file_directory_when_filename_also_contains_a_pathzdDescribeElementMetadata.but_it_prefers_a_specified_file_directory_when_filename_also_contains_a_path  s7    (?P`a""&6666}}+++r)   c                     t        ddd       y )N2T*   )category_depthr   text_as_htmlr   r   s    r'   Mit_knows_the_types_of_its_known_members_so_type_checking_support_is_availablezeDescribeElementMetadata.it_knows_the_types_of_its_known_members_so_type_checking_support_is_available$  s    	
r)   c                ^    t        d      }d|j                  v sJ |j                  dk(  sJ y )Nhttps://google.com)urlr   r   __dict__r   r   s     r'   +it_returns_the_value_of_an_attribute_it_haszCDescribeElementMetadata.it_returns_the_value_of_an_attribute_it_has.  s2    #78%%%xx////r)   c                T    t               }d|j                  vsJ |j                  J y )Nr   r   r   s     r'   :and_it_returns_None_for_a_known_attribute_it_does_not_havezRDescribeElementMetadata.and_it_returns_None_for_a_known_attribute_it_does_not_have3  s,     DMM)))xxr)   c                    t               }d|j                  vsJ t        j                  t        d      5  |j
                   d d d        y # 1 sw Y   y xY w)Ncoefficientz%object has no attribute 'coefficient'rI   )r   r   rK   rL   AttributeErrorr   r   s     r'   Fbut_it_raises_AttributeError_for_an_unknown_attribute_it_does_not_havez^DescribeElementMetadata.but_it_raises_AttributeError_for_an_unknown_attribute_it_does_not_have8  sI     DMM111]]>1XY 		 	 	s   AAc                    t               }d|j                  vsJ d|_        d|j                  v sJ |j                  dk(  sJ y )Nr   tmp/r   r   r   r   s     r'   .it_stores_a_non_None_field_value_when_assignedzFDescribeElementMetadata.it_stores_a_non_None_field_value_when_assigned>  sJ     t}}444$4==000""f,,,r)   c                    t        d      }d|j                  v sJ |j                  dk(  sJ d |_        d|j                  vsJ |j                  J y )Nr   )r   r   r   r   s     r'   .it_removes_a_field_when_None_is_assigned_to_itzFDescribeElementMetadata.it_removes_a_field_when_None_is_assigned_to_itE  s^    f54==000""f,,,"t}}444""***r)   c                Z    t        ddddd      }|j                         ddddddk(  sJ y )NrF   r   rT   <table></table>r   )r   r   page_numberr   r   )r   r/   r   s     r'   !it_can_serialize_itself_to_a_dictz9DescribeElementMetadata.it_can_serialize_itself_to_a_dictP  sI    !*$
 ||~$-'"
 
 	
 
r)   c                    t        dt        dt                     d      }|j                         ddddddddk(  sJ y )NrF   )rT   rT   rU   rV   rX   rw   rT   )r   rO   r   r   r   )r   r   r   r/   r   s     r'   Gand_it_serializes_a_coordinates_sub_object_to_a_dict_when_it_is_presentz_DescribeElementMetadata.and_it_serializes_a_coordinates_sub_object_to_a_dict_when_it_is_present`  s]    +7/1 
 ||~!" !:4	 	"
 	
 		
 	
r)   c                n    t        dt        dd      d      }|j                         ddddddk(  sJ y )NrF   5https://www.nih.gov/about-nih/who-we-are/nih-director
2023-11-09r   date_createdrT   )r   data_sourcer   )r   r   r/   r   s     r'   Gand_it_serializes_a_data_source_sub_object_to_a_dict_when_it_is_presentz_DescribeElementMetadata.and_it_serializes_a_data_source_sub_object_to_a_dict_when_it_is_presentt  sT    *K) 
 ||~N , "
 
 	
 
r)   c                    t        t        d      t        d      g      }t        d|d      }|j	                         ddddk(  sJ y )NLoremzLorem IpsumrF   rT   )r   orig_elementsr   zeJyFzcsKwjAQheFXKVm7MGkzbXwDocu6EpFcTqTQG3UEtfTdbZa6cTnDd/jPi0CHHgNf2yAOmXCljjqXoErKoIw3hqJRXlPuyphrErtM9GAbLNvNL+t2M56ctvU4o0+AXxPSo2m5g9jIb6VwBE0VBSujp1LJ6EiRLpwiSBf3fyvZcbo/vlqnwVvGbZzbN0KT7Hr5AG/eQyM=)r   r   r   r   r/   )r   elementsr   s      r'   Jand_it_serializes_an_orig_elements_sub_object_to_base64_when_it_is_presentzbDescribeElementMetadata.and_it_serializes_an_orig_elements_sub_object_to_base64_when_it_is_present  sZ    "E'ND4G#HI"
 ||~F
 	"
 	
 		
 	
r)   c                    dddiddid}t        j                  |      }|j                         }d|v sJ d|d   vsJ d|d   vsJ y)	zMetadata sub-objects ignore fields they do not explicitly define.

        This is _not_ the case for ElementMetadata itself where an non-known field is welcomed as a
        user-defined ad-hoc metadata field.
        hello	new_fieldworldfoo)r   r   rO   rO   r   N)r   r   r/   )r   element_metadatari   metadata_dicts       r'   Gbut_unlike_in_ElementMetadata_unknown_fields_in_sub_objects_are_ignoredz_DescribeElementMetadata.but_unlike_in_ElementMetadata_unknown_fields_in_sub_objects_are_ignored  sz     !W U
 #,,-=> ((*m+++-">>>>-">>>>r)   c                   ddddddddd	d
dgd}t        j                  |      }|j                  dk(  sJ |j                  t	        dt                     k(  sJ |j                  t        dd	
      k(  sJ |j                  J d|j                  vsJ |j                  dk(  sJ t        j                  t        d      5  |j                   d d d        t        |d      r|j                  nd J t!        |j"                  t$              sJ |j"                  dgk(  sJ |j"                  j'                  d       |j"                  ddgk(  sJ |d   dgk(  sJ y # 1 sw Y   xY w)NrF   (\?r   rT   rS   r   r   r   r   r   eng)r   r   rO   r   	languagesrw   r   z.ntMetadata' object has no attribute 'quotient'rI   quotientspar   )r   r   r   rO   r   r   r   r   r   r   r   rK   rL   r   r   rB   r    r   listappend)r   	meta_dictr   s      r'   %it_can_deserialize_itself_from_a_dictz=DescribeElementMetadata.it_can_deserialize_itself_from_a_dict  s   !" !:4	 O ,  
	  ((3 ""a''' #63+-$
 
 	
 
 #5G%$
 
 	
 
 ""***t}}444 4''' ]]>1ab 	MM	 ")z!:MMM $..$///~~%(((e$~~%///%%000	 	s   EE$c                h    t               }d|_        d|j                  v sJ |j                  dk(  sJ y N   foobar)r   r   r   r   s     r'   /it_allows_an_end_user_to_add_an_arbitrary_fieldzGDescribeElementMetadata.it_allows_an_end_user_to_add_an_arbitrary_field  s5     4==((({{ar)   c                T    t               }d|_        |j                         ddik(  sJ y r   )r   r   r/   r   s     r'   /and_fields_so_added_appear_in_the_metadata_JSONzGDescribeElementMetadata.and_fields_so_added_appear_in_the_metadata_JSON  s)     ||~(A...r)   c                    t               }d|_        d|j                  v sJ d |_        d|j                  vsJ t        j                  t
        d      5  |j                   d d d        y # 1 sw Y   y xY w)Nr   r   z2'ElementMetadata' object has no attribute 'foobar'rI   )r   r   r   rK   rL   r   r   s     r'   9and_it_removes_an_end_user_field_when_it_is_assigned_NonezQDescribeElementMetadata.and_it_removes_an_end_user_field_when_it_is_assigned_None  sl     4==(((t}},,,]]"V
 	 KK	 	 	s   A**A3c                   t        dd      }d|_        d|_        t        dd      }d|_        d	|_        |j	                  |       |j
                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  d	k(  sJ |j
                  J |j                  dk(  sJ |j                  dk(  sJ |j                  J |j                  J |j                  dk(  sJ |j                  d	k(  sJ t        j                  t        d
      5  |j                   d d d        y # 1 sw Y   y xY w)NrF   )r   r   r      r   rT   )r   r   gffffff?rZ   z.etadata' object has no attribute 'coefficient'rI   )r   r   stem_lengthr   updater   r   r   r   r   rK   rL   r   r   r   others      r'   *it_can_update_itself_from_another_instancezBDescribeElementMetadata.it_can_update_itself_from_another_instance  sl   aQ?v1EE ""a'''""f,,,1$$$4'''}}###2%%%##+++##v---  A%%%!!)))yy   ~~$$$  B&&&]]>1ab 		 	 	s   7EEc                    t               }t        j                  t        d      5  |j	                  ddi       d d d        y # 1 sw Y   y xY w)Nz1ate\(\)' must be an instance of 'ElementMetadata'rI   r   z0.56)r   rK   rL   rM   r   r   s     r'   Dbut_it_raises_on_attempt_to_update_from_a_non_ElementMetadata_objectz\DescribeElementMetadata.but_it_raises_on_attempt_to_update_from_a_non_ElementMetadata_object#  s?     ]]:-ab 	1KK/0	1 	1 	1s   AAc                    t        dt        dt                     t        dd      ddgd	d
d      }|t        dt        dt                     t        dd      ddgd	d
d      k(  sJ y )NrF   rS   rw   r   
2023-11-08r   r   r   rT   r   r   )r   rO   r   r   r   r   r   r   )r   r   r   r   r   s     r'   @it_is_equal_to_another_instance_with_the_same_known_field_valueszXDescribeElementMetadata.it_is_equal_to_another_instance_with_the_same_known_field_values*  s    +7/1 +K) "g*$
  +7/1 +K) "g*$
 
 	
 
r)   c                ~     G d d      }t               } |       }|j                  |j                  k(  sJ ||k7  sJ y )Nc                      e Zd Zy)iDescribeElementMetadata.but_it_is_never_equal_to_a_non_ElementMetadata_object.<locals>.NotElementMetadataN)__name__
__module____qualname__r0   r)   r'   NotElementMetadatar  M  s    r)   r  )r   r   )r   r  r   r   s       r'   5but_it_is_never_equal_to_a_non_ElementMetadata_objectzMDescribeElementMetadata.but_it_is_never_equal_to_a_non_ElementMetadata_objectL  sB    	 	  "$ }}...u}}r)   c                ^    t        d      }d|_        t        d      }d|_        ||k(  sJ y )NrF   r   r   r   r   r   s      r'   Ait_is_equal_to_another_instance_with_the_same_ad_hoc_field_valueszYDescribeElementMetadata.it_is_equal_to_another_instance_with_the_same_ad_hoc_field_valuesX  2    a0q1 u}}r)   c                ^    t        d      }d|_        t        d      }d|_        ||k7  sJ y )NrF   r
  r   g
ףp=
?r  r   s      r'   Abut_it_is_not_equal_to_an_instance_with_ad_hoc_fields_that_differzYDescribeElementMetadata.but_it_is_not_equal_to_an_instance_with_ad_hoc_fields_that_differ`  r  r)   c                D    t        dg      }|t        ddg      k7  sJ y )Nr   )r   r   r   r   s     r'   :it_is_not_equal_when_a_list_field_contains_different_itemszRDescribeElementMetadata.it_is_not_equal_when_a_list_field_contains_different_itemsh  s%    %1%@@@@r)   c                    t        t        dt                           }|t        t        dt                           k7  sJ y )NrS   rw   r   )r   )rT   r   rV   )r   rT   )r   r   r   r   s     r'   Aand_it_is_not_equal_when_the_coordinates_sub_object_field_differszYDescribeElementMetadata.and_it_is_not_equal_when_the_coordinates_sub_object_field_differsl  sI    +7/1
 +7/1
 
 	
 
r)   c                j    t        t        dd            }|t        t        dd            k7  sJ y )Nr   r   r   )r   r   )r   r   r   s     r'   Aand_it_is_not_equal_when_the_data_source_sub_object_field_differszYDescribeElementMetadata.and_it_is_not_equal_when_the_data_source_sub_object_field_differsz  sC    *K)
 *K)
 
 	
 
r)   c                    t               }t        |j                        }t        j                         }|D ]  }||v rJ d| d        y )NzElementMetadata field `.zm` does not have a consolidation strategy. Add one in `ConsolidationStrategy.field_consolidation_strategies().)r   sorted_known_field_namesr   field_consolidation_strategies)r   ri   metadata_field_namesconsolidation_strategies
field_names        r'   Cit_can_find_the_consolidation_strategy_for_each_of_its_known_fieldsz[DescribeElementMetadata.it_can_find_the_consolidation_strategy_for_each_of_its_known_fields  sb    "$%h&A&AB#8#W#W#Y . 	J!99 *:, 7W X9	r)   N)r   zpathlib.Path | str)(r  r  r  __doc__r   rK   markparametrizepathlibPathr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r0   r)   r'   r   r      s(   P3 [[GLL)*W47MM7	
7+77% [[[<7<<+Dk*RS,+, T,7,
0
 
-+
 
(
$
&?241p /
	B1 
D
A

 	r)   r   c            	     2   t        dt        d            } | t        dt        d| j                              t        dt        d| j                              g}t        t	        j
                  |            }|D cg c]  }|j                   }}t        |      t        t        |            k(  sJ d       |d   j                  j                  |d   j                  j                  k(  sJ t        |      D ]  \  }}|j                  ||   j                  k7  sJ d	       |j                  j                  @|j                  j                  |v sJ d
       |j                  j                  ||   j                  j                  k7  rJ d        y c c}w )NParentrF   r   r   ri   r   r   	parent_idz Recalculated IDs must be unique.rT   z'IDs haven't changed after recalculationz Parent ID not in the list of IDsz,Parent ID hasn't changed after recalculation)r   r   r!   r   copydeepcopyr#   setri   r(  	enumerate)parentr   updated_elementsr   idsidxupdated_elements          r'   /test_hash_ids_are_unique_for_duplicate_elementsr2    sm   x/a*HIF)o!vyy&YZ)o!vyy&YZH /t}}X/FG%5
6'7::
6C
6 s8s3s8}$H&HH$A;))Xa[-A-A-K-KKKK )*: ; >_!!Xc]%5%55`7``5##--9"++55<`>``<((22hsm6L6L6V6VV>=>V> 7s   ?Fc                    t        dt        d            } t        dt        d| j                              }| ||g}t        t	        j
                  |            }|D cg c]  }|j                   }}t        |      t        t        |            dz   k(  sJ d       |d   j                  j                  |d   j                  j                  k(  sJ y c c}w )	Nr$  rF   r%  r&  r   r'  z8One element is duplicated so uniques should be one less.rT   )
r   r   r!   r   r)  r*  r#   r+  ri   r(  )r-  r   r   r.  r/  s        r'   5test_hash_ids_can_handle_duplicated_element_instancesr4    s    x/a*HIF	OU[U^U^,_`G%w8H /t}}X/FG%5
6'7::
6C
6 s8s3s8}q((d*dd(A;))Xa[-A-A-K-KKKK	 7s   !Cc            	        t        dt        d            } | t        dt        d| j                              t        dt        d| j                              g}t        |      }|D cg c]  }|j                   }}|D cg c]  }|j                  j
                   }}|g dk(  sJ |g dk(  sJ y c c}w c c}w )	Nr$  rF   r%  r&  r   r'  ) ea9eb7e80383c190f8cafce1ad666624 4112a8d24886276e18e759d06956021b eba84bbe7f03e8b91a1527323040ee3d)Nr6  r6  )r   r   r!   r   ri   r(  )r-  r   r.  r   r/  
parent_idss         r'   test_hash_ids_are_deterministicr:    s    x/a*HIF)o!vyy&YZ)o!vyy&YZH /x8%5
6'7::
6C
6<LM'"",,MJM    
      7Ms   ,B7B<)r   sequence_numberr   r   r&   ))r   rF   foo.pdfrF    4bb264eb23ceb44cd8fcc5af44f8dc71)r   rT   r<  rF    75fc1de48cf724ec00aa8d1c5a0d3758)	some textr   some.txtN 1a2627b5760c06b1440102f11a1edb0f)r?  rF   r@  N e3fd10d867c4a1c0264dde40e3d7e45ac                    t        | t        ||            }|j                  |      |k(  sJ d       |j                  |k(  sJ d       y )N)r   r   r&  zReturned ID does not matchzID should be set)r   r   r%   r!   )r   r;  r   r   r&   r   s         r'   test_id_to_hash_calculatesrD    sT      (LG o.-?]A]]?::&:(::&r)   c                    t        d      } t        |       }t        j                         }t	        j
                  |D cg c]  }|j                          c}|       |j                  d       t        |      }||k(  sJ y c c}w )Nz1test_evaluate_files/unstructured_output/form.jsonr   r   )file)r   r   ioStringIOr-   dumpr/   seek)r   as_readtmp_filer   	as_read_2s        r'    test_formskeysvalues_reads_savesrN    sk     STHh/G{{}HII8Ww 8(CMM!H-Ii 9s   B)r   r   )rO   r   rP   r
   rQ   r
   rR   r   )
r   r"   r;  intr   r"   r   z
int | Noner&   r"   )9r  
__future__r   r)  rG  r-   r!  	functoolsr   rK   test_unstructured.unit_utilsr   r   unstructured.cleaners.corer   r   "unstructured.documents.coordinatesr	   r
   r   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r   unstructured.partition.jsonr   r  r   r(   r1   r4   r=   r@   rC   rN   	CARTESIANrs   rq   ru   ry   r   r   r   r   r   r2  r4  r:  rD  rN  r0   r)   r'   <module>rX     sC   ? "  	     J B 
   7 WY"xz$JK'' L'$E
 	""&
	&&CC( F -!!!!4		
 -!!<		
 -!!<		
 -4		
':999 9 	9;:92DI\0f fR>4L0 K
;
; #;/2;AK;\_;
; r)   