
    :QgT                       d Z ddlmZ ddlmZmZ ddlZddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ d Zd Zd Z d Z!d Z"d Z#d Z$ejJ                  jM                  di ddig      	 	 d#d       Z'd Z(d$dZ)d$dZ*d Z+d Z,ejJ                  jM                  di dfddidfddidfg      	 	 	 	 	 	 d%d        Z-d! Z.d" Z/y)&z3Test suite for `unstructured.partition.odt` module.    )annotations)AnyIteratorN)MockFixture)ANYFixtureRequestassert_round_trips_through_JSONexample_doc_pathmethod_mock)chunk_elements)CompositeElementElementTable
TableChunkTextTitle)partition_docx)partition_odt)#UNSTRUCTURED_INCLUDE_DEBUG_METADATAc                 b    t        d      } t        d      }t        |       t        |      k(  sJ y )N
simple.odtzsimple.docx)r
   r   r   )odt_file_pathdocx_file_paths     a/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/partition/test_odt.py)test_partition_odt_matches_partition_docxr   !   s.    $\2M%m4N'>.+IIII    c                     t        t        d            } | t        d      t        d      gk(  sJ t	        d | D              sJ t
        r+| D ch c]  }|j                  j                   c}dhk(  sJ y y c c}w )Nfake.odtLorem ipsum dolor sit amet.SHeader row Mon Wed Fri Color Blue Red Green Time 1pm 2pm 3pm Leader Sarah Mark Ryanc              3  N   K   | ]  }|j                   j                  d k(    yw)r   Nmetadatafilename.0es     r   	<genexpr>z3test_partition_odt_from_filename.<locals>.<genexpr>7   s     CQqzz""j0C   #%docx)r   r
   r   r   allr   r#   detection_originelementsr'   s     r    test_partition_odt_from_filenamer/   +   s    -j9:H+,&	
    C(CCCC*5=>

++>6(JJJ +>s   A6c                     t        t        d      d      5 } t        |       }d d d        t        d      t	        d      gk(  sJ y # 1 sw Y   %xY w)Nr   rb)filer   r    )openr
   r   r   r   fr.   s     r   test_partition_odt_from_filer6   =   sb    	z*D	1 )Q a() +,&	
   ) )s   AAc                     t        t        d            } t        d | D              s*J dt        | d   j                  j
                                y )Nr   c              3  N   K   | ]  }|j                   j                  d k(    yw)r   Nr"   r%   s     r   r(   zktest_partition_odt_from_filename_gets_the_ODT_filename_in_metadata_not_the_DOCX_filename.<locals>.<genexpr>Q   s     Eqqzz""l2Er)   zGExpected all elements to have 'simple.odt' as their filename, but got: r   )r   r
   r+   reprr#   r$   r.   s    r   Xtest_partition_odt_from_filename_gets_the_ODT_filename_in_metadata_not_the_DOCX_filenamer;   O   sR    -l;<HEHEE !%%../0	2Er   c                 X    t        t        d      d      } t        d | D              sJ y )Nr   test)metadata_filenamec              3  N   K   | ]  }|j                   j                  d k(    ywr=   Nr"   r%   s     r   r(   zJtest_partition_odt_from_filename_with_metadata_filename.<locals>.<genexpr>Y        ?qzz""f,?r)   r   r
   r+   r:   s    r   7test_partition_odt_from_filename_with_metadata_filenamerC   W   s(    -j9VTH?h????r   c                     t        t        d      d      5 } t        | d      }d d d        t        d D              sJ y # 1 sw Y   xY w)Nr   r1   r=   )r2   r>   c              3  N   K   | ]  }|j                   j                  d k(    ywr@   r"   r%   s     r   r(   zFtest_partition_odt_from_file_with_metadata_filename.<locals>.<genexpr>_   rA   r)   )r3   r
   r   r+   r4   s     r   3test_partition_odt_from_file_with_metadata_filenamerF   \   sM    	z*D	1 CQ a6BC?h????C Cs   AA
c                     dt        t        d            } t        fd| D              s-J d dt        | d   j                  j
                                y )Nz'application/vnd.oasis.opendocument.textr   c              3  P   K   | ]  }|j                   j                  k(    y wN)r#   filetype)r&   r'   ODT_MIME_TYPEs     r   r(   zQtest_partition_odt_gets_the_ODT_MIME_type_in_metadata_filetype.<locals>.<genexpr>h   s      Fqzz""m3F   #&zExpected all elements to have 'z' as their filetype, but got: r   )r   r
   r+   r9   r#   rJ   )r.   rK   s    @r   >test_partition_odt_gets_the_ODT_MIME_type_in_metadata_filetyperM   e   s_    =M-l;<HFXFF 
)- 9!%%../0	2Fr   kwargsinfer_table_structureTc                   t        t        d      d      5 }t        dd|i| }d d d        d   }t        |t              sJ |j
                  j                  J |j
                  j                  j                  d      sJ y # 1 sw Y   `xY w)Nr   r1   r2      z<table> )r3   r
   r   
isinstancer   r#   text_as_html
startswith)rN   r5   r.   tables       r   Rtest_partition_odt_adds_text_as_html_when_infer_table_structure_is_omitted_or_TruerW   q   s     
z*D	1 3Q 2a2623 QKEeU###>>&&222>>&&11)<<<3 3s   BBc                     t        t        d      d      5 } t        | d      }d d d        d   }t        |t              sJ |j
                  j                  J y # 1 sw Y   9xY w)Nr   r1   F)r2   rO   rQ   )r3   r
   r   rS   r   r#   rT   )r5   r.   rV   s      r   Ntest_partition_odt_suppresses_text_as_html_when_infer_table_structure_is_FalserY   ~   sj    	z*D	1 FQ auEF QKEeU###>>&&...F Fs   AA%c                    d| j                  d       t        t        d            }t        fd|D              sJ y )N2029-07-05T09:24:281unstructured.partition.odt.get_last_modified_datereturn_valuer   c              3  P   K   | ]  }|j                   j                  k(    y wrI   r#   last_modified)r&   r'   filesystem_last_modifieds     r   r(   zItest_partition_odt_pulls_last_modified_from_filesystem.<locals>.<genexpr>   s!     Vqzz''+CCVrL   patchr   r
   r+   )mockerr.   rb   s     @r   6test_partition_odt_pulls_last_modified_from_filesystemrf      sF    4
LL;Jb   -j9:HVXVVVVr   c                    d}d| j                  d|       t        t        d            }t        fd|D              sJ y )Nr[   z2020-07-05T09:24:28r\   r]   r   )metadata_last_modifiedc              3  P   K   | ]  }|j                   j                  k(    y wrI   r`   )r&   r'   rh   s     r   r(   zRtest_partition_odt_prefers_metadata_last_modified_when_provided.<locals>.<genexpr>   s!     Taqzz''+AATrL   rc   )re   rb   r.   rh   s      @r   ?test_partition_odt_prefers_metadata_last_modified_when_providedrj      sS    42
LL;Jb   &?UH T8TTTTr   c                 T    t        t        d            } t        d | D              sJ y )Nr   c              3  P   K   | ]  }|j                   j                  d gk(     yw)engN)r#   	languagesr%   s     r   r(   z=test_partition_odt_adds_languages_metadata.<locals>.<genexpr>   s!     A1qzz##w.As   $&rB   r:   s    r   *test_partition_odt_adds_languages_metadataro      s&    -l;<HAAAAAr   c                     t        t        d      d      } | D cg c]  }|j                  j                   c}dgddgdgdgdggk(  sJ y c c}w )Nzlanguage-docs/eng_spa_mult.odtT)detect_language_per_elementrm   spa)r   r
   r#   rn   r-   s     r   ;test_partition_odt_respects_detect_language_per_element_argrs      sd    9:X\H +33QAJJ  3					8   3s   A)rN   expected_valuehi_resstrategyautoc                    ddl m} dd}t        | |d|      }t        t	        d      fi |\  }|j                  t               |j                  d| k(  sJ y )	Nr   )_DocxPartitionerc              3  T   K   t        d| j                  j                          y w)Nstrategy == )r   _optsrv   )selfs    r   fake_iter_document_elementsz_test_partition_odt_forwards_strategy_arg_to_partition_docx.<locals>.fake_iter_document_elements   s#     \$**"5"5!6788s   &(_iter_document_elements)side_effectr   r{   )r}   ry   returnzIterator[Element])unstructured.partition.docxry   r   r   r
   assert_called_once_withr   text)requestrN   rt   ry   r~   _iter_elements_elements          r   :test_partition_odt_forwards_strategy_arg_to_partition_docxr      se     =9 "!/	O /=HHJW++C0<<\.)9::::r   c                 >    t        t        t        d                   y)zCElements produced can be serialized then deserialized without loss.r   N)r	   r   r
   rR   r   r   +test_partition_odt_round_trips_through_jsonr      s    #M2B<2P$QRr   c                     t        d      } t        |       }t        | d      }t        d |D              sJ |t        |      k(  sJ y )Nr   basic)chunking_strategyc              3  R   K   | ]  }t        |t        t        t        f       ! y wrI   )rS   r   r   r   )r&   cs     r   r(   zYtest_partition_odt_chunks_elements_when_chunking_strategy_is_specified.<locals>.<genexpr>   s     Tz!.zBCTs   %')r
   r   r+   r   )document_pathr.   chunkss      r   Ftest_partition_odt_chunks_elements_when_chunking_strategy_is_specifiedr      sK    $\2M]+H=GDF TVTTTT^H----r   )rN   dict[str, Any])re   r   )r   r   rN   r   rt   z
str | None)0__doc__
__future__r   typingr   r   pytestpytest_mockr   test_unstructured.unit_utilsr   r   r	   r
   r   unstructured.chunking.basicr   unstructured.documents.elementsr   r   r   r   r   r   r   r   unstructured.partition.odtr   &unstructured.partition.utils.constantsr   r   r/   r6   r;   rC   rF   rM   markparametrizerW   rY   rf   rj   ro   rs   r   r   r   rR   r   r   <module>r      s!   : "    #  7  7 4 VJK$$@
@ B)@$(G#HI	=	= J	=/WU"B

   (^z4((3z66JF5ST;;%3;EO;	;*S
.r   