
    :Qg4!                    0   d dl mZ d dlmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d Zd Zd Z G d d      Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d%dZ$d Z%d Z&d%dZ'd Z(d Z)d  Z*d! Z+d" Z,d# Z-d$ Z.y)&    )annotations)Any)patchN)MockFixture)assert_round_trips_through_JSONexample_doc_path)chunk_by_title)ElementTypeTitle)partition_md)#UNSTRUCTURED_INCLUDE_DEBUG_METADATAc                 .   t        d      } t        |       }t        |      dkD  sJ d|D cg c]  }|j                   c}vsJ t	        |d   t
              sJ t        r+|D ch c]  }|j                  j                   c}dhk(  sJ y y c c}w c c}w )N	README.mdfilenamer   	PageBreakmd)	r   r   lencategory
isinstancer   r   metadatadetection_origin)r   elementselemelements       `/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/partition/test_md.pytest_partition_md_from_filenamer      s    ,HX.Hx=1Bt}}BBBBhqk5)))*AIJg  11JtfTTT + C Ks   B%Bc                     t        d      } t        | d      5 }t        |      }d d d        t              dkD  sJ y # 1 sw Y   xY w)Nr   rbfiler   )r   openr   r   )r   fr   s      r   test_partition_md_from_filer$      sI    ,H	h	 (Q'(x=1( (s	   >Ac                     t        t        d            5 } | j                         }d d d        t              }t	        |      dkD  sJ t        d |D              sJ y # 1 sw Y   :xY w)Nr   textr   c              3  L   K   | ]  }|j                   j                  d u   y wNr   r   .0es     r   	<genexpr>z.test_partition_md_from_text.<locals>.<genexpr>*        =qqzz""d*=   "$)r"   r   readr   r   allr#   r'   r   s      r   test_partition_md_from_textr4   #   sd    	{+	, vvx &Hx=1=H==== s   AA(c                      e Zd Zi fddZy)MockResponsec                B    || _         || _        |dk  | _        || _        y )Ni,  )r'   status_codeokheaders)selfr'   r8   r:   s       r   __init__zMockResponse.__init__.   s$    	&#    N)r'   strr8   intr:   zdict[str, Any])__name__
__module____qualname__r<    r=   r   r6   r6   -   s
    NP r=   r6   c                 \   t        d      } t        |       5 }|j                         }d d d        t        dddi      }t	        j
                  t        d|      5 }t        d	      }d d d        t              d
kD  sJ t        d |D              sJ y # 1 sw Y   oxY w# 1 sw Y   :xY w)Nr      Content-Typetext/markdownr'   r8   r:   getreturn_valuehttps://fake.urlurlr   c              3  L   K   | ]  }|j                   j                  d u   y wr)   r*   r+   s     r   r.   z-test_partition_md_from_url.<locals>.<genexpr>C   r/   r0   )
r   r"   r1   r6   r   objectrequestsr   r   r2   )r   r#   r'   response_r   s         r   test_partition_md_from_urlrT   5   s    ,H	h 1vvx 1H
 
hH	= 8$678 x=1=H==== 8 8s   BB"B"B+c                 p   t        d      } t        |       5 }|j                         }d d d        t        dddi      }t	        j
                  t        d|      5 }t        j                  t              5  t        d	       d d d        d d d        y # 1 sw Y   mxY w# 1 sw Y   xY w# 1 sw Y   y xY w)
Nr   i  rF   z	text/htmlrH   rI   rJ   rL   rM   r   r"   r1   r6   r   rP   rQ   pytestraises
ValueErrorr   r   r#   r'   rR   rS   s        r   6test_partition_md_from_url_raises_with_bad_status_coder[   F   s    ,H	h 1vvx -H
 
hH	= -FMMR\D] -+,- - - - - - -/   BB,6B B,B B)	%B,,B5c                 p   t        d      } t        |       5 }|j                         }d d d        t        dddi      }t	        j
                  t        d|      5 }t        j                  t              5  t        d	       d d d        d d d        y # 1 sw Y   mxY w# 1 sw Y   xY w# 1 sw Y   y xY w)
Nr   rE   rF   zapplication/jsonrH   rI   rJ   rL   rM   rV   rZ   s        r   7test_partition_md_from_url_raises_with_bad_content_typer^   T   s    ,H	h 1vvx !34H
 
hH	= -FMMR\D] -+,- - - - - - -r\   c                 t    t        j                  t              5  t                d d d        y # 1 sw Y   y xY wr)   )rW   rX   rY   r   rC   r=   r   ,test_partition_md_raises_with_none_specifiedr`   b   s(    	z	"   s   .7c                     t        d      } t        |       5 }|j                         }d d d        t        j                  t
              5  t        |        d d d        y # 1 sw Y   9xY w# 1 sw Y   y xY w)Nr   )r   r'   )r   r"   r1   rW   rX   rY   r   )r   r#   r'   s      r   0test_partition_md_raises_with_too_many_specifiedrb   g   sg    ,H	h 1vvx 
z	" 3hT23 3 3 3s   A 	A, A),A5c                 t    t        t        d            } t        |       dkD  sJ t        d | D              sJ y )Nr   r   c              3  N   K   | ]  }|j                   j                  d k(    yw)r   Nr*   r+   s     r   r.   zRtest_partition_md_from_filename_gets_filename_from_filename_arg.<locals>.<genexpr>w   s     Daqzz""k1D   #%r   r   r   r2   r   s    r   ?test_partition_md_from_filename_gets_filename_from_filename_argrh   s   s8    ,[9:Hx=1D8DDDDr=   c                     t        t        d      d      5 } t        |       }d d d        t              dkD  sJ t	        d |D              sJ y # 1 sw Y   .xY w)Nr   r   r    r   c              3  L   K   | ]  }|j                   j                  d u   y wr)   r*   r+   s     r   r.   zAtest_partition_md_from_file_gets_filename_None.<locals>.<genexpr>   r/   r0   )r"   r   r   r   r2   r#   r   s     r   .test_partition_md_from_file_gets_filename_Nonerl   z   s[    	{+T	2 (aQ'( x=1=H====	( (s   AAc                 x    t        t        d      d      } t        |       dkD  sJ t        d | D              sJ y )Nr   orig-name.md)metadata_filenamer   c              3  N   K   | ]  }|j                   j                  d k(    ywrn   Nr*   )r,   r   s     r   r.   zLtest_partition_md_from_filename_prefers_metadata_filename.<locals>.<genexpr>   s!     Sww((N:Sre   rf   rg   s    r   9test_partition_md_from_filename_prefers_metadata_filenamerr      s:    ,[9^\Hx=1S(SSSSr=   c                     t        t        d      d      5 } t        | d      }d d d        t        d D              sJ y # 1 sw Y   xY w)Nr   r   rn   )r!   ro   c              3  N   K   | ]  }|j                   j                  d k(    ywrq   r*   r+   s     r   r.   zHtest_partition_md_from_file_prefers_metadata_filename.<locals>.<genexpr>   s     Gqzz""n4Gre   r"   r   r   r2   rk   s     r   5test_partition_md_from_file_prefers_metadata_filenamerv      sO    	{+T	2 JaQ.IJ GhGGGGJ Js   AA
c                     dt        t        d            } t        fd| D              s-J d dt        | d   j                  j
                                y )NrG   r   c              3  P   K   | ]  }|j                   j                  k(    y wr)   )r   filetype)r,   r-   MD_MIME_TYPEs     r   r.   zOtest_partition_md_gets_the_MD_MIME_type_in_metadata_filetype.<locals>.<genexpr>   s      Eqqzz""l2E   #&zExpected all elements to have 'z' as their filetype, but got: r   )r   r   r2   reprr   ry   )r   rz   s    @r   <test_partition_md_gets_the_MD_MIME_type_in_metadata_filetyper}      s_    "L,[9:HEHEE 
), 8!%%../0	2Er=   c                    d| j                  d       t        t        d            }t        fd|D              sJ y )N2029-07-05T09:24:280unstructured.partition.md.get_last_modified_daterJ   r   c              3  P   K   | ]  }|j                   j                  k(    y wr)   r   last_modified)r,   r-   filesystem_last_modifieds     r   r.   zVtest_partition_md_from_file_path_gets_last_modified_from_filesystem.<locals>.<genexpr>   s!     Vqzz''+CCVr{   r   r   r   r2   )mockerr   r   s     @r   Ctest_partition_md_from_file_path_gets_last_modified_from_filesystemr      sF    4
LL:Ia   ,[9:HVXVVVVr=   c                     t        t        d      d      5 } t        |       }d d d        t        d D              sJ y # 1 sw Y   xY w)Nr   r   r    c              3  L   K   | ]  }|j                   j                  d u   y wr)   r   r+   s     r   r.   zFtest_partition_md_from_file_gets_last_modified_None.<locals>.<genexpr>        BAqzz''4/Br0   ru   rk   s     r   3test_partition_md_from_file_gets_last_modified_Noner      sI    	{+T	2 (aQ'( BBBBB( (s   A  A	c                     t        t        d            5 } | j                         }d d d        t              }t	        d |D              sJ y # 1 sw Y   *xY w)Nr   r&   c              3  L   K   | ]  }|j                   j                  d u   y wr)   r   r+   s     r   r.   zFtest_partition_md_from_text_gets_last_modified_None.<locals>.<genexpr>   r   r0   r"   r   r1   r   r2   r3   s      r   3test_partition_md_from_text_gets_last_modified_Noner      sR    	{+	, vvx &HBBBBB s   AAc                    d}d| j                  d|       t        t        d            }t        fd|D              sJ y )Nr   2020-07-05T09:24:28r   rJ   r   )metadata_last_modifiedc              3  P   K   | ]  }|j                   j                  k(    y wr)   r   r,   r-   r   s     r   r.   zRtest_partition_md_from_file_path_prefers_metadata_last_modified.<locals>.<genexpr>   !     Taqzz''+AATr{   r   )r   r   r   r   s      @r   ?test_partition_md_from_file_path_prefers_metadata_last_modifiedr      sS    42
LL:Ia   %>TH T8TTTTr=   c                     dt        t        d      d      5 } t        |       }d d d        t        fdD              sJ y # 1 sw Y    xY w)Nr   r   r   )r!   r   c              3  P   K   | ]  }|j                   j                  k(    y wr)   r   r   s     r   r.   zMtest_partition_md_from_file_prefers_metadata_last_modified.<locals>.<genexpr>   r   r{   ru   )r#   r   r   s     @r   :test_partition_md_from_file_prefers_metadata_last_modifiedr      sW    2	{+T	2 WaQ?UVW T8TTTTW Ws   AAc                     dt        t        d            5 } | j                         }d d d        t              }t	        fd|D              sJ y # 1 sw Y   -xY w)Nr   r   )r'   r   c              3  P   K   | ]  }|j                   j                  k(    y wr)   r   r   s     r   r.   zMtest_partition_md_from_text_prefers_metadata_last_modified.<locals>.<genexpr>   r   r{   r   )r#   r'   r   r   s      @r   :test_partition_md_from_text_prefers_metadata_last_modifiedr      s\    2	{+	, vvx >TUHT8TTTT s   AAc                     t        t        d            5 } | j                         }d d d        t              }t	        |       y # 1 sw Y   !xY w)Nr   r&   )r"   r   r1   r   r   r3   s      r   test_partition_md_with_jsonr      sD    	{+	, vvx&H#H- s   AAc                 |    t        d      } t        |       }t        | d      }t        |      }||k7  sJ ||k(  sJ y )Nr   by_title)chunking_strategy)r   r   r	   )r   r   chunk_elementschunkss       r   3test_add_chunking_strategy_by_title_on_partition_mdr      sH    ,HH%H!(jINH%FX%%%V###r=   c                 ^    d} t        |       }|d   j                  j                  dgk(  sJ y )Nzexample-docs/README.mdr   r   engr   r   	languagesr   r   s     r   0test_partition_md_element_metadata_has_languagesr      s1    'HX.HA;))eW444r=   c                     d} t        | d      }|D cg c]  }|j                  j                   }}|dgddgdgdgdggk(  sJ y c c}w )Nz*example-docs/language-docs/eng_spa_mult.mdT)r   detect_language_per_elementr   spar   )r   r   r   langss       r   6test_partition_md_respects_detect_language_per_elementr      s]    ;HX4PH7?@GW''@E@eWuenugwHHHH As   Ac                     t        d      } t        |       }t        |      dkD  sJ |d   j                  t        j
                  k(  sJ y )Nzsimple-table.mdr   r   )r   r   r   r   r
   TABLEr   s     r   test_partition_md_parse_tabler      sG     12HX.Hx=1A;;#4#4444r=   )r   r   )/
__future__r   typingr   unittest.mockr   rW   rQ   pytest_mockr   test_unstructured.unit_utilsr   r   unstructured.chunking.titler	   unstructured.documents.elementsr
   r   unstructured.partition.mdr   &unstructured.partition.utils.constantsr   r   r$   r4   r6   rT   r[   r^   r`   rb   rh   rl   rr   rv   r}   r   r   r   r   r   r   r   r   r   r   r   rC   r=   r   <module>r      s    "     # Z 6 > 2 VU> >"--
3E>THWCCUUU.$5I5r=   