
    :Qg0                    R   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ dZej@                  jC                  dde
efdee	fdeefg      d%d       Z"ej@                  jC                  dddg      d&d       Z#d Z$d Z%ej@                  jC                  dde
efdee	fg      d%d       Z&d Z'd'dZ(d'dZ)d Z*d Z+ej@                  jC                  dddg      d(d       Z,d Z-d  Z.d! Z/d" Z0 G d# d$      Z1y))    )annotationsN)MockFixture)EXPECTED_TABLE"EXPECTED_TABLE_SEMICOLON_DELIMITEREXPECTED_TABLE_WITH_EMOJIEXPECTED_TEXT!EXPECTED_TEXT_SEMICOLON_DELIMITEREXPECTED_TEXT_WITH_EMOJIEXPECTED_TEXT_XLSX)FixtureRequestMockassert_round_trips_through_JSONexample_doc_pathfunction_mock)chunk_by_title)clean_extra_whitespace)Table)_CsvPartitioningContextpartition_csv)#UNSTRUCTURED_INCLUDE_DEBUG_METADATAztext/csv)filenameexpected_textexpected_tablestanley-cups.csvzstanley-cups-with-emoji.csvztable-semicolon-delimiter.csvr   c                   d|  }t        |      }t        |d   j                        |k(  sJ |d   j                  j                  |k(  sJ |d   j                  j
                  t        k(  sJ |d   j                  j                  | k(  sJ y )Nexample-docs/r   r   )r   r   textmetadatatext_as_htmlfiletypeEXPECTED_FILETYPEr   )r   r   r   f_pathelementss        a/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/partition/test_csv.py test_partition_csv_from_filenamer&   #   s     XJ'Ff-H!(1+"2"23}DDDA;,,>>>A;((,====A;((H444    infer_table_structureTFc                    d}t        ||       }t        |d   j                  d      xr |d   j                  j                  d u}|| k(  sJ y )Nexample-docs/stanley-cups.csv)r   r(   r   r    )r   hasattrr   r    )r(   r#   r$   $table_element_has_text_as_html_fields       r%   6test_partition_csv_from_filename_infer_table_structurer-   9   s]    ,FfDYZH 	$$n5 	:QK  --T9 ) 03HHHHr'   c                     t        t        d      d      } t        | d   j                        t        k(  sJ | d   j
                  j                  dk(  sJ y )Nr   test)metadata_filenamer   )r   r   r   r   r   r   r   r$   s    r%   7test_partition_csv_from_filename_with_metadata_filenamer2   E   sQ    -.@AU[\H!(1+"2"23}DDDA;((F222r'   c                 r    t        t        d      d      } t        | d   j                        t        k(  sJ y )Nzstanley-cups-utf-16.csvzutf-16)encodingr   )r   r   r   r   r   r1   s    r%    test_partition_csv_with_encodingr5   L   s3    -.GHS[\H!(1+"2"23}DDDr'   c                   d|  }t        |d      5 }t        |      }d d d        t        d   j                        |k(  sJ t	        |d   t
              sJ |d   j                  j                  |k(  sJ |d   j                  j                  t        k(  sJ |d   j                  j                  J t        r+|D ch c]  }|j                  j                   c}dhk(  sJ y y # 1 sw Y   xY wc c}w )Nr   rbfiler   csv)openr   r   r   
isinstancer   r   r    r!   r"   r   r   detection_origin)r   r   r   r#   fr$   elements          r%   test_partition_csv_from_filer@   R   s     XJ'F	fd	 )q a()!(1+"2"23}DDDhqk5)))A;,,>>>A;((,====A;((000*AIJg  11JugUUU +) ) Ks   C&>C2&C/c                     t        t        d      d      5 } t        | d      }d d d        t        d   j                        t
        k(  sJ |d   j                  j                  dk(  sJ y # 1 sw Y   IxY w)Nr   r7   r/   )r9   r0   r   )r;   r   r   r   r   r   r   r   r>   r$   s     r%   3test_partition_csv_from_file_with_metadata_filenamerC   f   sw    	12D	9 CQ a6BC "(1+"2"23}DDDA;((F222	C Cs   A,,A5c                    d}| j                  d|       t        t        d            }|d   j                  j                  |k(  sJ y )N2029-07-05T09:24:281unstructured.partition.csv.get_last_modified_datereturn_valuer   r   patchr   r   r   last_modified)mockerfilesystem_last_modifiedr$   s      r%   Dtest_partition_csv_from_file_path_gets_last_modified_from_filesystemrN   q   sQ    4
LL;-  
 -.@ABHA;--1IIIIr'   c                    d}d}| j                  d|       t        t        d      |      }|d   j                  j                  |k(  sJ y )NrE   2020-07-05T09:24:28rF   rG   r   )metadata_last_modifiedr   rI   )rL   rM   rQ   r$   s       r%   @test_partition_csv_from_file_path_prefers_metadata_last_modifiedrR   }   s^    42
LL;Jb   +,E[H A;--1GGGGr'   c                     t        t        d      d      5 } t        |       }d d d        d   j                  j                  J y # 1 sw Y   %xY w)Nr   r7   r8   r   r;   r   r   r   rK   rB   s     r%   4test_partition_csv_from_file_gets_last_modified_NonerU      sR    	12D	9 )Q a() A;--555) )s   AAc                     d} t        t        d      d      5 }t        ||       }d d d        d   j                  j                  | k(  sJ y # 1 sw Y   (xY w)NrP   r   r7   )r9   rQ   r   rT   )rQ   r>   r$   s      r%   ;test_partition_csv_from_file_prefers_metadata_last_modifiedrW      sb    2	12D	9 XQ a@VWX A;--1GGGGX Xs   AAc                D    t        t        |             }t        |       y )Nr   )r   r   r   r   r$   s     r%   test_partition_csv_with_jsonrZ      s    &6x&@AH#H-r'   c                 x    d} t        |       }t        | dddd      }t        |dd      }||k7  sJ ||k(  sJ y )	Nr*   r   by_title	   r   F)chunking_strategymax_characterscombine_text_under_n_charsinclude_header)r_   r`   )r   r   )r   r$   chunk_elementschunkss       r%   7test_add_chunking_strategy_to_partition_csv_non_defaultrd      sW    .Hh/H"$#$N HQSTUFX%%%V###r'   c                 b    d} t        | dd      }|d   j                  j                  dgk(  sJ y )Nr*   fastF)r   strategyra   r   engr   r   	languagesrY   s     r%   1test_partition_csv_element_metadata_has_languagesrk      s6    .HhPUVHA;))eW444r'   c                 f    d} t        | ddgd      }|d   j                  j                  dgk(  sJ y )Nr*   rf   deuF)r   rg   rj   ra   r   ri   rY   s     r%   )test_partition_csv_respects_languages_argrn      s>    .HFugeH A;))eW444r'   c                     t        t        d      dd      } | d   }|j                  dt        z   k(  sJ |j                  j
                  J y )Nr   rf   T)rg   ra   r   z#Stanley Cups Unnamed: 1 Unnamed: 2 )r   r   r   r   r   r    )r$   tables     r%   test_partition_csv_headerrq      sS    +,vdH QKE::>ASSSSS>>&&222r'   c                     e Zd ZdZd Zd Zej                  j                  dddg      dd       Z	d Z
d	 Zej                  j                  d
ddg      	 	 	 	 dd       Z	 	 ddZd Zd Zd Zd Z ej$                         dd       Zy)Describe_CsvPartitioningContextzIUnit-test suite for `unstructured.partition.csv._CsvPartitioningContext`.c                n    t        j                  t        d      d d dd      }t        |t               sJ y )Nr   T	file_pathr9   r4   ra   r(   )r   loadr   r<   selfctxs     r%   .it_provides_a_validating_alternate_constructorzNDescribe_CsvPartitioningContext.it_provides_a_validating_alternate_constructor   s:    %**&'9:"&
 #6777r'   c                    t        j                  t        d      5  t        j                  d d d dd       d d d        y # 1 sw Y   y xY w)N1either file-path or file-like object must be provmatchTru   )pytestraises
ValueErrorr   rw   ry   s    r%   ;and_the_validating_constructor_raises_on_an_invalid_contextz[Describe_CsvPartitioningContext.and_the_validating_constructor_raises_on_an_invalid_context   sC    ]]:-`a 	#((#&*	 	 	s   A  A		file_namer   zcsv-with-long-lines.csvc                N    t        t        |            }|j                  dk(  sJ y )N,r   r   	delimiter)ry   r   rz   s      r%   <it_auto_detects_the_delimiter_for_a_comma_delimited_CSV_filez\Describe_CsvPartitioningContext.it_auto_detects_the_delimiter_for_a_comma_delimited_CSV_file   s&     &&6y&AB}}###r'   c                N    t        t        d            }|j                  dk(  sJ y )Nzsemicolon-delimited.csv;r   rx   s     r%   Dand_it_auto_detects_the_delimiter_for_a_semicolon_delimited_CSV_filezdDescribe_CsvPartitioningContext.and_it_auto_detects_the_delimiter_for_a_semicolon_delimited_CSV_file   s%    %&67P&QR}}###r'   c                H    t        t        d            }|j                  J y )Nzsingle-column.csvr   rx   s     r%   Abut_it_returns_None_as_the_delimiter_for_a_single_column_CSV_filezaDescribe_CsvPartitioningContext.but_it_returns_None_as_the_delimiter_for_a_single_column_CSV_file   s#    %&67J&KL}}$$$r'   )ra   expected_value)FN)Tr   c                :    t        |      j                  |k(  sJ y )N)ra   )r   header)ry   ra   r   s      r%   8it_identifies_the_header_row_based_on_include_header_argzXDescribe_CsvPartitioningContext.it_identifies_the_header_row_based_on_include_header_arg  s     'nELLP^^^^r'   c                v    d}||_         t        d      }|j                  }|j                  d       ||k(  sJ y )Nz2024-08-04T02:23:53za/b/document.csv)rv   )rH   r   rK   assert_called_once_with)ry   get_last_modified_date_rM   rz   rK   s        r%   Ait_gets_last_modified_from_the_filesystem_when_a_path_is_providedzaDescribe_CsvPartitioningContext.it_gets_last_modified_from_the_filesystem_when_a_path_is_provided  sG     $9 /G,%0BC))778JK 8888r'   c                f    t        j                  d      }t        |      }|j                  }|J y )Ns   abcdefgr8   )ioBytesIOr   rK   )ry   r9   rz   rK   s       r%   Sand_it_falls_back_to_None_for_the_last_modified_date_when_file_path_is_not_providedzsDescribe_CsvPartitioningContext.and_it_falls_back_to_None_for_the_last_modified_date_when_file_path_is_not_provided  s1    zz*%%40))$$$r'   c                   t        t        d      d      5 }|j                          t        |      }|j                         5 }||u sJ |j	                         dk(  sJ |j                  d      dk(  sJ |j	                         dk(  sJ 	 d d d        |j	                         dk(  sJ 	 d d d        y # 1 sw Y   (xY w# 1 sw Y   y xY w)Nr   r7   r8   r         Stanley Cups,,)r;   r   readr   tell)ry   r>   rz   r9   s       r%   Oit_provides_transparent_access_to_the_source_file_when_it_is_a_file_like_objectzoDescribe_CsvPartitioningContext.it_provides_transparent_access_to_the_source_file_when_it_is_a_file_like_object$  s    "#56= 	!FFH)q1C &tqy yvvx1}$}yy}(9999vvx2~%~& 668q= =	! 	!& &		! 	!s$   -B>AB2B>2B;	7B>>Cc                    t        t        d            }|j                         5 }|j                  d      dk(  sJ 	 d d d        y # 1 sw Y   y xY w)Nr   r   r   )r   r   r;   r   )ry   rz   r9   s      r%   Hit_provides_transparent_access_to_the_source_file_when_it_is_a_file_pathzhDescribe_CsvPartitioningContext.it_provides_transparent_access_to_the_source_file_when_it_is_a_file_path3  sJ    %&67I&JKXXZ 	6499R=$5555	6 	6 	6s   AAc                    t        j                  t        d      5  t               j	                          d d d        y # 1 sw Y   y xY w)Nr}   r~   )r   r   r   r   	_validater   s    r%   5it_raises_when_neither_file_path_nor_file_is_providedzUDescribe_CsvPartitioningContext.it_raises_when_neither_file_path_nor_file_is_provided:  s5    ]]:-`a 	2#%//1	2 	2 	2s	   >Ac                    t        |d      S )NrF   )r   )ry   requests     r%   r   z7Describe_CsvPartitioningContext.get_last_modified_date_@  s    W&YZZr'   N)r   str)ra   boolr   z
int | None)r   r   )r   r   returnr   )__name__
__module____qualname____doc__r{   r   r   markparametrizer   r   r   r   r   r   r   r   r   fixturer    r'   r%   rs   rs      s    S8 [[ &		
$$$% [[AMS\C]^_"_4>_ __
9'+
9%!62 V^^[ [r'   rs   )r   r   r   r   r   r   )r(   r   )rL   r   )r   r   )2
__future__r   r   r   pytest_mockr   *test_unstructured.partition.test_constantsr   r   r   r   r	   r
   r   test_unstructured.unit_utilsr   r   r   r   r   unstructured.chunking.titler   unstructured.cleaners.corer   unstructured.documents.elementsr   unstructured.partition.csvr   r   &unstructured.partition.utils.constantsr   r"   r   r   r&   r-   r2   r5   r@   rC   rN   rR   rU   rW   rZ   rd   rk   rn   rq   rs   r   r'   r%   <module>r      sx   # 	  #    7 = 1 M V  3	]N;	&(@B[\+-.	
55 04-@I AI3E 3	]N;	&(@B[\
V
V3	JH6H &8:W%XY. Z.
$$553n[ n[r'   