
    :QggB                       d Z ddlmZ ddlZddlmZ ddlZddlmZ ddl	m
Z
mZmZmZmZmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZmZ  ed       ed       ed       ed      gZd Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d+dZ)d Z*d Z+d Z,d Z-d Z.d  Z/d! Z0d" Z1d# Z2d,d$Z3d% Z4d& Z5d' Z6d( Z7 G d) d*      Z8y)-z3Test suite for `unstructured.partition.msg` module.    )annotationsN)Any)Message)FixtureRequestLogCaptureFixtureMockassert_round_trips_through_JSONexample_doc_pathfunction_mockproperty_mock)chunk_by_title)ElementMetadataListItemNarrativeTextTextTitle)UnsupportedFileFormatError)MsgPartitionerOptionspartition_msg+This is a test email to use for unit tests.)textzImportant points:zRoses are redzViolets are bluec                    t        d      } t        |       }|d   j                  j                  }|t        k(  sJ |d   j                  j                         t        d | dd d dgdgdd|d	g
      j                         k(  sJ y )Nfake-email.msgfilenamer   2023-03-28T17:00:31+00:00z."Matthew Robinson" <mrobinson@unstructured.io>zmrobinson@unstructured.ioz
Test Emailapplication/vnd.ms-outlookeng)coordinatesr   last_modifiedpage_numberurl	sent_fromsent_tosubjectfiletype	parent_id	languages)r
   r   metadatar'   EXPECTED_MSG_OUTPUTto_dictr   )r   elementsr'   s      a/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/partition/test_msg.py test_partition_msg_from_filenamer.   '   s     01Hh/H$$..I****$$&5GH01 1g
 ')		    c                 \    t        d      } t        |       }t        |d   t              sJ y )Nr   r   r   )r
   r   
isinstancer   r   r,   s     r-   5test_partition_msg_from_filename_returns_uns_elementsr3   ?   s+     01Hh/Hhqk=111r/   c                 \    t        d      } t        | d      }t        d |D              sJ y )Nr   test)r   metadata_filenamec              3  N   K   | ]  }|j                   j                  d k(    yw)r5   Nr)   r   .0elements     r-   	<genexpr>zJtest_partition_msg_from_filename_with_metadata_filename.<locals>.<genexpr>H   s!     Kww((F2K   #%)r
   r   allr2   s     r-   7test_partition_msg_from_filename_with_metadata_filenamer?   E   s-     01Hh&IHK(KKKKr/   c                     t        d      } t        |       }t        |d         dk(  sJ |d   j                  j                  dk(  sJ |d   j                  j
                  t        d      k(  sJ y )Nr   r   r   r    )r
   r   strr)   r   file_directoryr2   s     r-   2test_partition_msg_from_filename_with_text_contentrD   K   sr     01Hh/Hx{LLLLA;((,<<<<A;..2B22FFFFr/   c                     t        d      } t        j                  t              5  t	        |        d d d        y # 1 sw Y   y xY w)Nzdoesnt-exist.msgr   )r
   pytestraisesFileNotFoundErrorr   r   s    r-   +test_partition_msg_raises_with_missing_filerI   U   s7     23H	(	) )x() ) )s	   ;Ac                     t        d      } t        | d      5 }t        |      }d d d        t        k(  sJ |D ]  }|j                  j
                  J  y # 1 sw Y   4xY w)Nr   rbfiler
   openr   r*   r)   r   r   fr,   r;   s       r-   test_partition_msg_from_filerR   [   sn     01H	h	 ) a()**** 1((0001) )s   AA!c                     t        d      } t        | d      5 }t        |d      }d d d        t        k(  sJ |D ]  }|j                  j
                  dk(  rJ  y # 1 sw Y   7xY w)Nr   rK   r5   rM   r6   rN   rP   s       r-   3test_partition_msg_from_file_with_metadata_filenamerU   d   sv     01H	h	 C a6BC**** 3((F2223C Cs   AA%c                 l    t        t        d      t        j                  d            } | t        k(  sJ y )Nr   s   abcderL   )r   r
   ioBytesIOr*   r,   s    r-   9test_partition_msg_uses_file_path_when_both_are_specifiedrZ   m   s.    -.>?bjjQYFZ[H****r/   c                 t    t        j                  t              5  t                d d d        y # 1 sw Y   y xY wN)rF   rG   
ValueErrorr    r/   r-   &test_partition_msg_raises_with_neitherr_   r   s(    	z	"   s   .7c                    t        t        d      d      } t        d | d d D              sJ t        d | dd D              sJ t        d | dd	 D              sJ t        d
 | d	d  D              sJ | d d D cg c]  }|j                   c}g dk(  sJ | D cg c]  }t	        |      j
                   c}d d g dk(  sJ | D cg c]  }t	        |      j
                   c}dd  g dk(  sJ y c c}w c c}w c c}w )N#fake-email-multiple-attachments.msgTprocess_attachmentsc              3  N   K   | ]  }|j                   j                  d k(    yw)ra   Nr8   r:   es     r-   r<   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s!     bPQqzz""&KKbr=      c              3  N   K   | ]  }|j                   j                  d k(    yw)zunstructured_logo.pngNr8   re   s     r-   r<   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s      U!qzz""&==Ur=      c              3  N   K   | ]  }|j                   j                  d k(    yw)zdense_doc.pdfNr8   re   s     r-   r<   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s     O!qzz""o5Or=   iW  c              3  N   K   | ]  }|j                   j                  d k(    yw)zEngineering Onboarding.pptxNr8   re   s     r-   r<   z=test_partition_msg_can_process_attachments.<locals>.<genexpr>   s      \qzz""&CC\r=   )Here are those documents.--Mallori HarrellUnstructured TechnologiesData Scientist
   )
r   r   r   r   r   Imager   r   r   r   i)
r   r   r   r   r   r   r   r   r   r   )r   r
   r>   r   type__name__)r,   rf   s     r-   *test_partition_msg_can_process_attachmentsru   z   s/   >?UYH bU]^`_`UabbbbUxPQRS}UUUUOx#OOOO\XVYVZ^\\\\$RaL)qAFF) .    '//DG/4 9    '//DG/5 :   ' * 0 0s   :C2C7C<c                    t        | dt                      t        t        d      d      }|t	        d      t        d      t        d      t        d	      t        d
      gk(  sJ y )Nz%unstructured.partition.auto.partition)side_effectra   Trb   rl   rm   rn   ro   rp   )r   r   r   r
   r   r   r   )requestr,   s     r-   Atest_partition_msg_silently_skips_attachments_it_cannot_partitionry      sr    8F`Fb >?UYH
 12T
 )*   r/   c                 |    t        t        d            } t        d | D              sJ t        d | D              sJ y )Nr   c              3  N   K   | ]  }|j                   j                  d k(    yw)r   Nr8   re   s     r-   r<   zYtest_partition_msg_from_filename_gets_filename_metadata_from_file_path.<locals>.<genexpr>   s      I1qzz""&66Ir=   c              3  `   K   | ]&  }|j                   j                  t        d       k(   ( yw)rA   N)r)   rC   r
   re   s     r-   r<   zYtest_partition_msg_from_filename_gets_filename_metadata_from_file_path.<locals>.<genexpr>   s%     SQqzz((,<R,@@Ss   ,.r   r
   r>   rY   s    r-   Ftest_partition_msg_from_filename_gets_filename_metadata_from_file_pathr~      s;    -.>?@HIIIIIS(SSSSr/   c                     t        t        d      d      5 } t        |       }d d d        t        d D              sJ t        d |D              sJ y # 1 sw Y   2xY w)Nr   rK   rL   c              3  L   K   | ]  }|j                   j                  d u   y wr\   r8   re   s     r-   r<   zKtest_partition_msg_from_file_gets_filename_metadata_None.<locals>.<genexpr>   s     =qqzz""d*=   "$c              3  L   K   | ]  }|j                   j                  d u   y wr\   r)   rC   re   s     r-   r<   zKtest_partition_msg_from_file_gets_filename_metadata_None.<locals>.<genexpr>   s     CQqzz((D0Cr   rO   r
   r   r>   rQ   r,   s     r-   8test_partition_msg_from_file_gets_filename_metadata_Noner      s^    	/0$	7 )1 a() =H====C(CCCC	) )s   AAc                     t        t        d      d      } t        d | D              sJ t        d | D              sJ y )Nr   	a/b/c.msg)r6   c              3  N   K   | ]  }|j                   j                  d k(    yw)zc.msgNr8   re   s     r-   r<   zMtest_partition_msg_from_filename_prefers_metadata_filename.<locals>.<genexpr>        @!qzz""g-@r=   c              3  N   K   | ]  }|j                   j                  d k(    yw)za/bNr   re   s     r-   r<   zMtest_partition_msg_from_filename_prefers_metadata_filename.<locals>.<genexpr>        Daqzz((E1Dr=   r}   rY   s    r-   :test_partition_msg_from_filename_prefers_metadata_filenamer      s>    -.>?S^_H@x@@@@D8DDDDr/   c                     t        t        d      d      5 } t        | d      }d d d        t        d D              sJ t        d |D              sJ y # 1 sw Y   2xY w)Nr   rK   z	d/e/f.msgrT   c              3  N   K   | ]  }|j                   j                  d k(    yw)zf.msgNr8   re   s     r-   r<   zItest_partition_msg_from_file_prefers_metadata_filename.<locals>.<genexpr>   r   r=   c              3  N   K   | ]  }|j                   j                  d k(    yw)zd/eNr   re   s     r-   r<   zItest_partition_msg_from_file_prefers_metadata_filename.<locals>.<genexpr>   r   r=   r   r   s     r-   6test_partition_msg_from_file_prefers_metadata_filenamer      sd    	/0$	7 H1 a;GH @x@@@@D8DDDD	H Hs   AAc                     dt        t        d            } t        fd| D              s-J d dt        | d   j                  j
                                y )Nr   r   c              3  P   K   | ]  }|j                   j                  k(    y wr\   )r)   r&   )r:   rf   MSG_MIME_TYPEs     r-   r<   zQtest_partition_msg_gets_the_MSG_mime_type_in_metadata_filetype.<locals>.<genexpr>   s      Fqzz""m3F   #&zExpected all elements to have 'z' as their filetype, but got: r   )r   r
   r>   reprr)   r&   )r,   r   s    @r-   >test_partition_msg_gets_the_MSG_mime_type_in_metadata_filetyper      s`    0M-.>?@HFXFF 
)- 9!%%../0	2Fr/   c                 T    t        t        d            } t        d | D              sJ y )Nr   c              3  N   K   | ]  }|j                   j                  d k(    yw)r   Nr)   r    re   s     r-   r<   zPtest_partition_msg_pulls_last_modified_from_message_sent_date.<locals>.<genexpr>   s      Y1qzz''+FFYr=   r}   rY   s    r-   =test_partition_msg_pulls_last_modified_from_message_sent_dater      s(    -.>?@HYPXYYYYr/   c                 p    d} t        t        d      |       }|d   j                  j                  | k(  sJ y )N2020-07-05T09:24:28r   )metadata_last_modifiedr   )r   r
   r)   r    )r   r,   s     r-   @test_partition_msg_from_file_path_prefers_metadata_last_modifiedr      s?    2)*CYH A;--1GGGGr/   c                     dt        t        d      d      5 } t        |       }d d d        t        fdD              sJ y # 1 sw Y    xY w)Nr   r   rK   )rM   r   c              3  P   K   | ]  }|j                   j                  k(    y wr\   r   )r:   rf   r   s     r-   r<   zNtest_partition_msg_from_file_prefers_metadata_last_modified.<locals>.<genexpr>   s!     Taqzz''+AATr   r   )rQ   r,   r   s     @r-   ;test_partition_msg_from_file_prefers_metadata_last_modifiedr      sX    2	/0$	7 X1 a@VWX T8TTTTX Xs   AAc                 B    t        t        d            } t        |        y )Nr   )r   r
   r	   rY   s    r-   test_partition_msg_with_jsonr     s    -.>?@H#H-r/   c                z    t        t        d            }|g k(  sJ d| j                  v sJ d| j                  v sJ y )Nfake-encrypted.msgWARNINGzEncrypted email detected)r   r
   r   )caplogr,   s     r-   -test_partition_msg_with_pgp_encrypted_messager   	  sC    -.BCDHr>>###%444r/   c                 ~    t        d      } t        |       }t        | d      }t        |      }||k7  sJ ||k(  sJ y )Nr   r   by_title)chunking_strategy)r
   r   r   )r   r,   chunk_elementschunkss       r-   4test_add_chunking_strategy_by_title_on_partition_msgr     sI     01Hh/H"8zJNH%FX%%%V###r/   c                 ^    d} t        |       }|d   j                  j                  dgk(  sJ y )Nexample-docs/fake-email.msgr   r   r   )r   r)   r(   r2   s     r-   1test_partition_msg_element_metadata_has_languagesr     s1    ,Hh/HA;))eW444r/   c                 L    d} t        | dg      }t        d |D              sJ y )Nr   deur   r(   c              3  P   K   | ]  }|j                   j                  d gk(     yw)r   N)r)   r(   r9   s     r-   r<   z<test_partition_msg_respects_languages_arg.<locals>.<genexpr>(  s#     Mw))eW4Ms   $&)r   r>   r2   s     r-   )test_partition_msg_respects_languages_argr   %  s)    ,Hh5'BHMHMMMMr/   c                 ~    t        j                  t              5  d} t        | d       d d d        y # 1 sw Y   y xY w)Nr   r   r   )rF   rG   	TypeErrorr   r   s    r-   9test_partition_msg_raises_TypeError_for_invalid_languagesr   +  s2    	y	! :0x59: : :s   3<c                     e Zd ZdZddZej                  j                  dddg      	 	 	 	 	 	 dd       ZddZ	ddZ
dd	Zdd
Z	 	 ddZej                  j                  dddg      	 	 	 	 	 	 	 	 dd       ZddZ	 	 ddZddZej                  j                  dddg      	 	 	 	 dd       Z	 	 ddZej(                  dd       Zej(                  dd       Zej(                  d d       Zy)!DescribeMsgPartitionerOptionszOUnit-test suite for `unstructured.partition.msg.MsgPartitionerOptions` objects.c                $   t        d      |d<   t        d	i |}|j                  }|j                  dgk(  sJ |j                  dgk(  sJ |j
                  dk(  sJ |j                  dgk(  sJ |j                  g dk(  sJ |j                  dk(  sJ y )
Nzfake-email-with-cc-and-bcc.msg	file_pathhello@unstructured.iosteve@unstructured.ioz.14DDEF33-2BA7-4CDD-A4D8-E7C5873B37F2@gmail.comz""John" <johnjennings702@gmail.com>)zjohn-ctr@unstructured.ior   r   z%Fake email with cc and bcc recipientsr^   )	r
   r   extra_msg_metadatabcc_recipientcc_recipientemail_message_idr#   r$   r%   )self	opts_argsoptsms       r-   :it_provides_email_specific_metadata_to_add_to_each_elementzXDescribeMsgPartitionerOptions.it_provides_email_specific_metadata_to_add_to_each_element>  s    !12R!S	+$1y1###:";;;;~~"9!::::!!%UUUU{{CDDDDyy 
 
 	
 

 yyCCCCr/   )	file_nameexpected_value)r   T)r   Fc                V    t        |      |d<   t        di |}|j                  |u sJ y )Nr   r^   )r
   r   is_encrypted)r   r   r   r   r   s        r-   "it_knows_when_the_msg_is_encryptedz@DescribeMsgPartitionerOptions.it_knows_when_the_msg_is_encryptedP  s4     "2)!<	+$1y1  N222r/   c                P    d|d<   d|d<   t        di |}|j                  dk(  sJ y )Nz	x/y/z.msgr   r   metadata_file_pathr^   r   r   r   r   r   s      r-   0it_uses_the_metadata_file_path_arg_when_providedzNDescribeMsgPartitionerOptions.it_uses_the_metadata_file_path_arg_when_provided]  s8    !,	+*5	&'$1y1&&+555r/   c                \    t        d      }||d<   t        di |}|j                  |k(  sJ y Nr   r   r^   )r
   r   r   )r   r   r   r   s       r-   8and_it_falls_back_to_the_MSG_file_path_arg_when_providedzVDescribeMsgPartitionerOptions.and_it_falls_back_to_the_MSG_file_path_arg_when_providedd  s8    $%56	!*	+$1y1&&)333r/   c                6    t        di |}|j                  J y )Nr^   r   r   s      r-   2but_it_returns_None_when_neither_path_is_availablezPDescribeMsgPartitionerOptions.but_it_returns_None_when_neither_path_is_availablek  s!    $1y1&&...r/   c                f    d}||d<   t        d      |d<   t        di |}|j                  |k(  sJ y )Nz2024-03-05T17:02:53r   r   r   r^   r
   r   r   )r   r   r   r   s       r-   :it_uses_metadata_last_modified_when_provided_by_the_callerzXDescribeMsgPartitionerOptions.it_uses_metadata_last_modified_when_provided_by_the_callerr  sE    !6.D	*+!12B!C	+$1y1**.DDDDr/   c                X    t        d      |d<   t        di |}|j                  dk(  sJ y )Nr   r   r   r^   r   r   s      r-   Oand_it_uses_the_message_Date_header_when_metadata_last_modified_is_not_providedzmDescribeMsgPartitionerOptions.and_it_uses_the_message_Date_header_when_metadata_last_modified_is_not_providedz  s6     "22B!C	+$1y1**.IIIIr/   filesystem_last_modifiedz2024-06-03T20:12:53Nc                t    d |_         ||_         t        d      |d<   t        di |}|j                  |k(  sJ y r   )return_valuer
   r   r   )r   r   r   Message_sent_date__last_modified_prop_r   s         r-   Yand_it_uses_the_last_modified_date_from_the_source_file_when_the_message_has_no_sent_datezwDescribeMsgPartitionerOptions.and_it_uses_the_last_modified_date_from_the_source_file_when_the_message_has_no_sent_date  sH     +/',D)!12B!C	+$1y1**.FFFFr/   c                n    t        d      |d<   t        di |}t        |j                  t              sJ y r   )r
   r   r1   msgr   r   s      r-   8it_loads_the_msg_document_from_a_file_path_when_providedzVDescribeMsgPartitionerOptions.it_loads_the_msg_document_from_a_file_path_when_provided  s3    !12B!C	+$1y1$((G,,,r/   c                    t        t        d      d      5 }t        j                  |j	                               |d<   d d d        t        di |}t        |j                  t              sJ y # 1 sw Y   1xY w)Nr   rK   rM   r^   )	rO   r
   rW   rX   readr   r1   r   r   )r   r   rQ   r   s       r-   Cand_it_loads_the_msg_document_from_a_file_like_object_when_providedzaDescribeMsgPartitionerOptions.and_it_loads_the_msg_document_from_a_file_like_object_when_provided  sf     "#34d; 	5q "

1668 4If	5$1y1$((G,,,		5 	5s   'A--A6c                    t        j                  t        d      5  t        di |j                   d d d        y # 1 sw Y   y xY w)Nz2one of `file` or `filename` arguments must be prov)matchr^   )rF   rG   r]   r   r   )r   r   s     r-   &but_it_raises_when_neither_is_providedzDDescribeMsgPartitionerOptions.but_it_raises_when_neither_is_provided  s7    ]]:-ab 	3!.I.22	3 	3 	3s	   ;Apartition_attachmentsTFc                `    t        d      |d<   ||d<   t        di |}|j                  |u sJ y )Nr   r   r   r^   )r
   r   r   )r   r   r   r   s       r-   7it_knows_whether_attachments_should_also_be_partitionedzUDescribeMsgPartitionerOptions.it_knows_whether_attachments_should_also_be_partitioned  sA     "22B!C	+-B	)*$1y1))-BBBBr/   c                R    ddd|d<   t        di |}|j                  dddk(  sJ y )N*   baz)foobarkwargsr^   )r   partitioning_kwargsr   s      r-   Oit_provides_access_to_pass_through_kwargs_collected_by_the_partitioner_functionzmDescribeMsgPartitionerOptions.it_provides_access_to_pass_through_kwargs_collected_by_the_partitioner_function  s9     ')7	($1y1''2e+DDDDr/   c                $    t        |t        d      S )N_last_modified)r   r   r   rx   s     r-   r   z2DescribeMsgPartitionerOptions._last_modified_prop_  s    W&;=MNNr/   c                $    t        |t        d      S )N	sent_date)r   r   r   s     r-   r   z0DescribeMsgPartitionerOptions.Message_sent_date_  s    Wg{;;r/   c                    dddddi dS )zAll default arguments for `MsgPartitionerOptions`.

        Individual argument values can be changed to suit each test. Makes construction of opts more
        compact for testing purposes.
        NF)rM   r   r   r   r   r   r^   )r   s    r-   r   z'DescribeMsgPartitionerOptions.opts_args  s      "&&*%*
 	
r/   )r   dict[str, Any])r   rB   r   boolr   r   )r   r   r   z
str | Noner   r   r   r   )r   r   r   r   rx   r   )returnr   )rt   
__module____qualname____doc__r   rF   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   fixturer   r   r   r^   r/   r-   r   r   9  s   YD$ [['*FHa)b33.23?M3364/EJ'J [[7:OQU9VWG!G #-G !	G
 #G XG --'-3 [[4tUmDC%)C6DC ECE'E ^^O O ^^< < ^^
 
r/   r   r   )r   r   )9r  
__future__r   rW   typingr   rF   oxmsgr   test_unstructured.unit_utilsr   r   r   r	   r
   r   r   unstructured.chunking.titler   unstructured.documents.elementsr   r   r   r   r   unstructured.partition.commonr   unstructured.partition.msgr   r   r*   r.   r3   r?   rD   rI   rR   rU   rZ   r_   ru   ry   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r^   r/   r-   <module>r     s    9 " 	      7  E K DE	"#/"$%	 02LG)13+
'T0TDEEZ
HU.
5$5N:[
 [
r/   