
    :Qg*                    $   d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ  ej6                  e      j:                  j=                         Zej@                  jC                  d      Z"g dZ#ej@                  jC                  d      Z"d Z$ejJ                  jM                  de#      d#d       Z'ejJ                  jM                  de#      d#d       Z(ejJ                  jM                  de#      d#d       Z)ejJ                  jM                  de#      d#d       Z*ejJ                  jM                  de#      d#d       Z+d Z,d Z-d Z.d Z/d$dZ0d Z1d Z2d$dZ3d Z4d  Z5d! Z6d" Z7y)%z4Test-suite for `unstructured.partition.json` module.    )annotationsN)MockFixture)example_doc_path)CompositeElement)FileType)partition_email)partition_htmlpartition_json)partition_text)partition_xml)elements_to_jsonz/.dockerenv)fake-text.txtzfake-html.htmlzeml/fake-email.emlc                 h    t        ddd      } t        |       dk(  sJ t        d | D              sJ y )N%example-docs/spring-weather.html.jsonbasici  )chunking_strategymax_characters
   c              3  <   K   | ]  }t        |t                y wN)
isinstancer   ).0chs     b/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/partition/test_json.py	<genexpr>zPtest_it_chunks_elements_when_a_chunking_strategy_is_specified.<locals>.<genexpr>)   s     ABz"./As   )r   lenall)chunkss    r   =test_it_chunks_elements_when_a_chunking_strategy_is_specifiedr    #   s<    /7[_F v;"A&AAAA    filenamec                   t        |       }g }t        j                  t        j                  j                  |      d         }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }t        j                         5 }t        j                  j!                  |       }t        j                  j#                  ||dz         }t%        ||d       t'        |      }d d d        t)        |      dkD  sJ t)        t+        |d               dkD  sJ t)        |      t)              k(  sJ t-        t)        |            D ]?  }||   ||   k(  sJ ||   j.                  j0                  | j3                  d      d   k(  r?J  y # 1 sw Y   xY w)	N   r"   .json   r"   indentr   /)r   r   from_extensionospathsplitextTXTr   HTMLr	   XMLr   EMLr   tempfileTemporaryDirectorybasenamejoinr   r   r   strrangemetadatar"   split)	r"   r.   elementsfiletypetmpdir	_filename	test_pathtest_elementsis	            r   !test_partition_json_from_filenamerC   ,   s   H%DH&&rww'7'7'=a'@AH8<<!408== !408<< $/8<<"D1		$	$	& ;&GG$$X.	GGLLW)<=	Ia@&	:	; x=1s8A; 1$$$x=C....3x=! H{mA....{##,,s0CB0GGGGH; ;s   AGGc                   t        |       }g }t        j                  t        j                  j                  |      d         }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }t        j                         5 }t        j                  j!                  |       }t        j                  j#                  ||dz         }t%        ||d       t'        |d      }d d d        t)              dkD  sJ t)        t+        |d               dkD  sJ t-        d	 |D              sJ y # 1 sw Y   JxY w)
Nr$   r%   r&   r'   r(   test)r"   metadata_filenamer   c              3  N   K   | ]  }|j                   j                  d k(    yw)rE   N)r:   r"   )r   elements     r   r   zKtest_partition_json_from_filename_with_metadata_filename.<locals>.<genexpr>_   s!     Pww((F2Ps   #%)r   r   r,   r-   r.   r/   r0   r   r1   r	   r2   r   r3   r   r4   r5   r6   r7   r   r   r   r8   r   )r"   r.   r<   r=   r>   r?   r@   rA   s           r   8test_partition_json_from_filename_with_metadata_filenamerI   I   s?   H%DH&&rww'7'7'=a'@AH8<<!408== !408<< $/8<<"D1		$	$	& U&GG$$X.	GGLLW)<=	Ia@&	VT	U }!!!s=#$%)))P-PPPPU Us   AE::Fc                   t        |       }g }t        j                  t        j                  j                  |      d         }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }t        j                         5 }t        j                  j!                  |       }t        j                  j#                  ||dz         }t%        ||d       t'        |d      5 }t)        |      }d d d        d d d        t+        |      dkD  sJ t+        t-        |d               dkD  sJ t+        |      t+              k(  sJ t/        t+        |            D ]?  }	||	   ||	   k(  sJ ||	   j0                  j2                  | j5                  d	      d
   k(  r?J  y # 1 sw Y   xY w# 1 sw Y   xY w)Nr$   r%   r&   r'   r(   rbfiler   r*   r+   )r   r   r,   r-   r.   r/   r0   r   r1   r	   r2   r   r3   r   r4   r5   r6   r7   r   openr   r   r8   r9   r:   r"   r;   
r"   r.   r<   r=   r>   r?   r@   frA   rB   s
             r   test_partition_json_from_filerQ   b   s   H%DH&&rww'7'7'=a'@AH8<<!408== !408<< $/8<<"D1		$	$	& 3&GG$$X.	GGLLW)<=	Ia@)T" 	3a*2M	3	3 x=1s8A; 1$$$x=C....3x=! H{mA....{##,,s0CB0GGGGH	3 	3	3 3s%   AG50G)=G5)G2	.G55G>c                <   t        |       }g }t        j                  t        j                  j                  |      d         }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }t        j                         5 }t        j                  j!                  |       }t        j                  j#                  ||dz         }t%        ||d       t'        |d      5 }t)        |d      }d d d        d d d        t+        t-                    D ]   }	||	   j.                  j0                  dk(  r J  y # 1 sw Y   IxY w# 1 sw Y   MxY w)	Nr$   r%   r&   r'   r(   rK   rE   )rM   rF   )r   r   r,   r-   r.   r/   r0   r   r1   r	   r2   r   r3   r   r4   r5   r6   r7   r   rN   r   r9   r   r:   r"   rO   s
             r   4test_partition_json_from_file_with_metadata_filenamerS      sX   H%DH&&rww'7'7'=a'@AH8<<!408== !408<< $/8<<"D1		$	$	& M&GG$$X.	GGLLW)<=	Ia@)T" 	Ma*VLM	M	M 3}%& <Q((11V;;;<	M 	M	M Ms%   AF0F>FF	FFc                    t        |       }g }t        j                  t        j                  j                  |      d         }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }|t        j                  k(  rt        |      }t        j                         5 }t        j                  j!                  |       }t        j                  j#                  ||dz         }t%        ||d       t'        |      5 }|j)                         }d d d        t+              }	d d d        t-        |      dkD  sJ t-        t/        |d               dkD  sJ t-        |      t-        	      k(  sJ t1        t-        |            D ]?  }
||
   |	|
   k(  sJ ||
   j2                  j4                  | j7                  d      d	   k(  r?J  y # 1 sw Y   xY w# 1 sw Y   xY w)
Nr$   r%   r&   r'   r(   textr   r*   r+   )r   r   r,   r-   r.   r/   r0   r   r1   r	   r2   r   r3   r   r4   r5   r6   r7   r   rN   readr   r   r8   r9   r:   r"   r;   )r"   r.   r<   r=   r>   r?   r@   rP   rV   rA   rB   s              r   test_partition_json_from_textrX      s   H%DH&&rww'7'7'=a'@AH8<<!408== !408<< $/8<<"D1		$	$	& 2&GG$$X.	GGLLW)<=	Ia@)_ 	668D	&D12 x=1s8A; 1$$$x=C....3x=! H{mA....{##,,s0CB0GGGGH	 		2 2s%   AH/G8 H8H	=HHc                 t    t        j                  t              5  t                d d d        y # 1 sw Y   y xY wr   pytestraises
ValueErrorr    r!   r   .test_partition_json_raises_with_none_specifiedr_      s)    	z	"   s   .7c                 &    t        d      g k(  sJ y )N rU   r
   r^   r!   r   +test_partition_json_works_with_empty_stringrb      s    r"b(((r!   c                 &    t        d      g k(  sJ y )Nz[]rU   r
   r^   r!   r   )test_partition_json_works_with_empty_listrd      s    t$***r!   c                    t        d      } g }t        j                  t        j                  j                  |       d         }|t        j                  k(  rt        |       }|t        j                  k(  rt        |       }|t        j                  k(  rt        |       }|t        j                  k(  rt        |       }t        j                         5 }t        j                  j!                  |d      }t#        ||d       t%        |d      5 }|j'                         j)                  d      }d d d        d d d        t+        j,                  t.              5  t1        	       d d d        t+        j,                  t.              5  t1        
       d d d        t+        j,                  t.              5  t1               d d d        t+        j,                  t.              5  t1               d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   jxY w# 1 sw Y   y xY w)Nr   r$   r%   zfake-text.txt.jsonr'   r(   rK   zutf-8)r"   rM   )r"   rV   )rM   rV   )r"   rM   rV   )r   r   r,   r-   r.   r/   r0   r   r1   r	   r2   r   r3   r   r4   r5   r7   r   rN   rW   decoder[   r\   r]   r   )r.   r<   r=   r>   r@   rP   rV   s          r   2test_partition_json_raises_with_too_many_specifiedrg      s   O,DH&&rww'7'7'=a'@AH8<<!408== !408<< $/8<<"D1		$	$	& ,&GGLL)=>	Ia@)T" 	,a668??7+D	,, 
z	" 3	23 
z	" 6	56 
z	" *AD)* 
z	" >	=> >	, 	,, ,3 36 6* *> >sT   ;H G<.HHH 5H,$H8<H	HHH H),H58Ic                    d| j                  d       t        t        d            }t        fd|D              sJ y )N2029-07-05T09:24:282unstructured.partition.json.get_last_modified_datereturn_valuespring-weather.html.jsonc              3  P   K   | ]  }|j                   j                  k(    y wr   r:   last_modified)r   efilesystem_last_modifieds     r   r   zXtest_partition_json_from_file_path_gets_last_modified_from_filesystem.<locals>.<genexpr>   s!     Vqzz''+CCV   #&)patchr   r   r   )mockerr<   rr   s     @r   Etest_partition_json_from_file_path_gets_last_modified_from_filesystemrv      sG    4
LL<Kc   ./IJKHVXVVVVr!   c                     t        dd      5 } t        |       }d d d        t        d D              sJ y # 1 sw Y   xY w)Nr   rK   rL   c              3  L   K   | ]  }|j                   j                  d u   y wr   ro   r   rq   s     r   r   zHtest_partition_json_from_file_gets_last_modified_None.<locals>.<genexpr>        BAqzz''4/B   "$)rN   r   r   )rP   r<   s     r   5test_partition_json_from_file_gets_last_modified_Noner|      sD    	5t	< *!q)* BBBBB* *s	   7A c                     t        d      5 } | j                         }d d d        t              }t        d |D              sJ y # 1 sw Y   *xY w)Nr   rU   c              3  L   K   | ]  }|j                   j                  d u   y wr   ro   ry   s     r   r   zHtest_partition_json_from_text_gets_last_modified_None.<locals>.<genexpr>   rz   r{   rN   rW   r   r   )rP   rV   r<   s      r   5test_partition_json_from_text_gets_last_modified_Noner      sM    	5	6 !vvx 4(HBBBBB s   AAc                z    d}d| j                  d|       t        d      }t        fd|D              sJ y )Nri   2020-07-05T09:24:28rj   rk   r   )metadata_last_modifiedc              3  P   K   | ]  }|j                   j                  k(    y wr   ro   r   rq   r   s     r   r   zTtest_partition_json_from_file_path_prefers_metadata_last_modified.<locals>.<genexpr>  !     Taqzz''+AATrs   )rt   r   r   )ru   rr   r<   r   s      @r   Atest_partition_json_from_file_path_prefers_metadata_last_modifiedr      sN    42
LL<Kc   /H^H T8TTTTr!   c                     dt        t        d      d      5 } t        |       }d d d        t        fdD              sJ y # 1 sw Y    xY w)Nr   rm   rK   )rM   r   c              3  P   K   | ]  }|j                   j                  k(    y wr   ro   r   s     r   r   zOtest_partition_json_from_file_prefers_metadata_last_modified.<locals>.<genexpr>  r   rs   )rN   r   r   r   )rP   r<   r   s     @r   <test_partition_json_from_file_prefers_metadata_last_modifiedr     sX    2	9:D	A YQ!qAWXY T8TTTTY Ys   AAc                     dt        d      5 } | j                         }d d d        t              }t        fd|D              sJ y # 1 sw Y   -xY w)Nr   r   )rV   r   c              3  P   K   | ]  }|j                   j                  k(    y wr   ro   r   s     r   r   zOtest_partition_json_from_text_prefers_metadata_last_modified.<locals>.<genexpr>  r   rs   r   )rP   rV   r<   r   s      @r   <test_partition_json_from_text_prefers_metadata_last_modifiedr     sW    2	5	6 !vvx 4@VWHT8TTTT s   AAc                 |    d} t        j                  t              5  t        |        d d d        y # 1 sw Y   y xY w)Nz{"hi": "there"}rU   rZ   rU   s    r   2test_partition_json_raises_with_unprocessable_jsonr   #  s2     D	z	" "D!" " "   2;c                 |    d} t        j                  t              5  t        |        d d d        y # 1 sw Y   y xY w)Nz[{"hi": "there"}]]rU   rZ   rU   s    r   ,test_partition_json_raises_with_invalid_jsonr   +  s0    D	z	" "D!" " "r   )r"   r8   )ru   r   )8__doc__
__future__r   r-   pathlibr4   r[   pytest_mockr   test_unstructured.unit_utilsr   unstructured.documents.elementsr   unstructured.file_utils.modelr   unstructured.partition.emailr   unstructured.partition.htmlr	   unstructured.partition.jsonr   unstructured.partition.textr   unstructured.partition.xmlr   unstructured.staging.baser   Path__file__parentresolve	DIRECTORYr.   existsis_in_docker
test_filesr    markparametrizerC   rI   rQ   rS   rX   r_   rb   rd   rg   rv   r|   r   r   r   r   r   r   r^   r!   r   <module>r      s   : " 	    # 9 < 2 8 6 6 6 4 6GLL"))113	ww~~m,
 ww~~m,B Z0H 1H8 Z0Q 1Q0 Z0H 1H8 Z0< 1<. Z0H 1H:
)+>FWCCUUU""r!   