
    :QgE                     p   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dl	m
Z
mZmZmZmZ e j                  j!                  d      Ze j                  j%                   ej                  e      j(                  j+                         ddd      Ze j                  j%                  ed      ZdZd	Zd
ZdZdZdZ ej<                  g dg dg dg dg dd      Z ej<                  g dg dg dg dd      Z  ejB                         d        Z"ejF                  jI                  ed      ejF                  jK                  d      d               Z&ejF                  jO                  deee ed      dddifeee ed      d i fe
ee ed!      d"i fg      d#        Z(ejF                  jI                  ed      ejF                  jK                  d      d$               Z)ejF                  jI                  ed      ejF                  jK                  d      d%               Z*ejF                  jI                  ed      ejF                  jK                  d      d&               Z+ejF                  jI                  ed      ejF                  jK                  d      d'               Z,ejF                  jI                  ed      ejF                  jK                  d      d(               Z-ejF                  jI                  ed      ejF                  jK                  d      d)               Z.ejF                  jI                  ed      ejF                  jK                  d      ejF                  jO                  d*d+d,g      d-e/d.e0fd/                     Z1ejF                  jI                  ed      ejF                  jK                  d      d0               Z2ejF                  jI                  ed      ejF                  jK                  d      d1               Z3ejF                  jI                  ed      ejF                  jK                  d      d2               Z4ejF                  jI                  ed      ejF                  jK                  d      d3               Z5ejF                  jI                  ed      ejF                  jK                  d      d4               Z6ejF                  jI                  ed      ejF                  jK                  d      d5               Z7ejF                  jI                  ed      ejF                  jK                  d      ejF                  jO                  d6d+d,g      d7e/d.e0fd8                     Z8ejF                  jI                  ed      ejF                  jK                  d      d9               Z9ejF                  jI                  ed      ejF                  jK                  d      d:               Z:ejF                  jI                  ed      ejF                  jK                  d      d;               Z;y)<    N)Path)ElementTypeMetricsCalculatorTableStructureMetricsCalculatorTextExtractionMetricsCalculatorfilter_metricsget_mean_groupingz/.dockerenvz..zexample-docstest_evaluate_filesunstructured_outputgold_standard_cctgold_standard_element_typegold_standard_table_structureunstructured_output_cct#unstructured_output_table_structure)Bank Good Credit Loan.pptx Performance-Audit-Discussion.pdfzcurrency.csv)pptxpdfcsv)
connector1r   
connector2)gCl?g+?gMb?)gMbP?gMb`?gˡE?)filenamedoctype	connectorzcct-accuracyzcct-%missing)r   r   r   zelement-type-accuracyc               #   (   K   d } d  |         yw)z@Fixture for removing side-effects of running tests in this file.c                      g d} d t        j                  t              D        }|D ]0  }|j                  | v st	        j
                  |j                         2 y)z.Remove directories created from running tests.)test_evaluate_results_ccttest_evaluate_results_cct_txt"test_evaluate_results_element_type$test_evaluate_result_table_structurec              3   B   K   | ]  }|j                         s|  y w)N)is_dir).0ds     d/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/metrics/test_evaluate.py	<genexpr>zL_cleanup_after_test.<locals>.remove_generated_directories.<locals>.<genexpr>J   s     Iahhj1Is   N)osscandirTESTING_FILE_DIRnameshutilrmtreepath)target_dir_namessubdirsr#   s      r$   remove_generated_directoriesz9_cleanup_after_test.<locals>.remove_generated_directories@   sM    
 Jbjj)9:I 	&Avv))aff%	&    N )r/   s    r$   _cleanup_after_testr2   <   s     &  
 "s   z&Skipping this test in Docker container)reasonr2   c                     t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |      j                  |dd       t         j                  j                  t         j                  j                  |d            sJ t        j                  t         j                  j                  |d      d      }t        |      dk(  sJ t        |j                        d	k(  sJ |j                  d
   j                  dk(  sJ y )Nr   documents_dirground_truths_dirF)
export_dirvisualize_progressdisplay_agg_dfall-docs-cct.tsv	sep      r   r   )r&   r,   joinr(   UNSTRUCTURED_OUTPUT_DIRNAMEGOLD_CCT_DIRNAMEr   	calculateisfilepdread_csvlencolumnsilocr   
output_dir
source_dirr8   dfs       r$   test_text_extraction_evaluationrO   T   s     .0KLJ.0@AJ.0KLJ# Ji:%PUiV77>>"'',,z3EFGGG	RWW\\*.@At	LBr7a<<rzz?a771:">>>>r0   )calculator_classoutput_dirnamesource_dirnamer,   expected_lengthkwargszBank Good Credit Loan.pptx.txtr@   document_typetxtzIRS-2023-Form-1095-A.pdf.json   zIRS-form-1987.pdf.json   c                     t        t              |z  }t        t              |z  } | d||d|}|j                  |      }	t        |	      |k(  sJ y )Nr5   r1   )r   r(   _process_documentrH   )
rP   rQ   rR   r,   rS   rT   rL   rM   
calculatoroutput_lists
             r$   :test_process_document_returns_the_correct_amount_of_valuesr]   f   s]    @ &'.8J&'.8J!c
jc\bcJ..t4K{...r0   c                  "   t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |d      j                  |       t        j                  t         j                  j                  |d      d      }t        |      dk(  sJ t        |j                        d	k(  sJ |j                  d
   j                  dk(  sJ y )Nr   rV   r6   r7   rU   r8   r;   r<   r=   r?   r@   r   r   )r&   r,   rA   r(   UNSTRUCTURED_CCT_DIRNAMErC   r   rD   rF   rG   rH   rI   rJ   r   rK   s       r$   (test_text_extraction_evaluation_type_txtrb      s     .0HIJ.0@AJ.0KLJ# Jei:i&	RWW\\*.@At	LBr7a<<rzz?a771:">>>>r0   c                     t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |      j                  |d       t         j                  j                  t         j                  j                  |d            sJ t        j                  t         j                  j                  |d      d      }t        |      dk(  sJ t        |j                        d	k(  sJ |j                  d
   j                  dk(  sJ y )Nr   r5   Fr8   r9   z#all-docs-element-type-frequency.tsvr<   r=      rX   r   zIRS-form-1987.pdf)r&   r,   rA   r(   rB   GOLD_ELEMENT_TYPE_DIRNAMEr   rD   rE   rF   rG   rH   rI   rJ   r   rK   s       r$   test_element_type_evaluationrg      s     .0KLJ.0IJJ.0KLJ  $ i:%i@77>>"'',,z3XYZZZ	RWW\\*.STZ^	_Br7a<<rzz?a771:"5555r0   c                  ,   t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |      j                  |d       t         j                  j                  t         j                  j                  |d            sJ t         j                  j                  t         j                  j                  |d            sJ t        j                  t         j                  j                  |d      d      }t        j                  t         j                  j                  |d      d      j                  d	      }t        |      d
k(  sJ t        |j                        dk(  sJ |j                  d   j                  dk(  sJ t!        j"                  t!        j$                  |d   |d         d      |j&                  d   k(  sJ y )Nr   r5   Frd   z%all-docs-table-structure-accuracy.tsvz&aggregate-table-structure-accuracy.tsvr<   r=   metric      re   zIRS-2023-Form-1095-A.pdftable_level_acctotal_tables)weightsr?   )rl   average)r&   r,   rA   r(   $UNSTRUCTURED_TABLE_STRUCTURE_DIRNAMEGOLD_TABLE_STRUCTURE_DIRNAMEr   rD   rE   rF   rG   	set_indexrH   rI   rJ   r   nproundro   loc)rL   rM   r8   rN   agg_dfs        r$   test_table_structure_evaluationrw      s    .0TUJ.0LMJ.0VWJ# $ i:%i@77>>"'',,z3Z[\\\77>>"'',,z3[\]]]	RWW\\*.UV\`	aB[[
Z!IJPTi  r7a<<rzz?b   771:"<<<<
B012n;MNPQR::23	4	4r0   c                  f   t         j                  j                  t        t              } dg}t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |      j                  |      j                  |       t         j                  j                  t         j                  j                  |d            sJ t        j                  t         j                  j                  |d      d      }t        |      t        |      k(  sJ y )	Nzcurrency.csv.jsonr   r5   )document_pathsr`   r;   r<   r=   )r&   r,   rA   r(   rB   rC   r   on_filesrD   rE   rF   rG   rH   )rL   r\   rM   r8   rN   s        r$   test_text_extraction_takes_listr{      s     .0KLJ&'K.0@AJ.0KLJ# $ hkh*99
9+K 77>>"'',,z3EFGGG	RWW\\*.@At	LBr7c+&&&&r0   c                     t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |d      j                  |       t        j                  t         j                  j                  |d      d      }t        |      dk(  sJ y )	Nr   r   )r6   r7   group_byr`   all-doctype-agg-cct.tsvr<   r=   rX   )r&   r,   rA   r(   rB   rC   r   rD   rF   rG   rH   rK   s       r$   "test_text_extraction_with_groupingr      s     .0KLJ.0@AJ.0KLJ# $ i:i&	RWW\\*.GHd	SBr7a<<r0   c                     t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        j                  t              5  t        | |d      j                  |       d d d        y # 1 sw Y   y xY w)Nr   zinvalid typer_   r`   )r&   r,   rA   r(   rB   rC   pytestraises
ValueErrorr   rD   )rL   rM   r8   s      r$   test_text_extraction_wrong_typer      s     .0KLJ.0@AJ.0KLJ	z	" +'$
R`	

)z)
*+ + +s   B66B?)grouping	count_row)r   r?   )r   rj   r   r   c                 0   t         j                  j                  t        d      }t	        | t
        |d       t        j                  t         j                  j                  |d|  d      d      }||    j                         j                         |k(  sJ y )Nr   text_extractionr}   
data_inputr8   	eval_nameall-z-agg-cct.tsvr<   r=   )
r&   r,   rA   r(   r   DUMMY_DF_CCTrF   rG   dropnanunique)r   r   r8   
grouped_dfs       r$   test_get_mean_grouping_df_inputr      s~     .0KLJ#	 RWW\\*XJl6STZ^_Jh&&(002i???r0   c                  :   t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |      j                  |       t         j                  j                  |d      }t        d||d       t        j                  t         j                  j                  |d      d	
      }|d   j                         j                         dk(  sJ y )Nr   r5   r`   r;   r   r   r   r~   r<   r=   r?   )r&   r,   rA   r(   rB   rC   r   rD   r   rF   rG   r   r   )rL   rM   r8   r   r   s        r$    test_get_mean_grouping_tsv_inputr     s     .0KLJ.0@AJ.0KLJ# $ i:i&ww||J(:;H#	 RWW\\*6OPVZ[Ji '')113q888r0   c                  
   t         j                  j                  t        t              } t         j                  j                  t        t
              }t         j                  j                  t        d      }t        | |      j                  |       t        j                  t         j                  j                  |d      d      }t        j                  t              5  t        d||d	       d d d        y # 1 sw Y   y xY w)
Nr   r5   r`   r;   r<   r=   invalidr   r   )r&   r,   rA   r(   rB   rC   r   rD   rF   rG   r   r   r   r   rK   s       r$   $test_get_mean_grouping_invalid_groupr   #  s     .0KLJ.0@AJ.0KLJ# $ i:i&	RWW\\*.@At	LB	z	" 
!'		

 
 
s    C99Dc                      t        j                         } t        j                  t              5  t        d| dd       d d d        y # 1 sw Y   y xY w)Nr   some_dirr   r   rF   	DataFramer   r   
SystemExitr   )empty_dfs    r$   &test_text_extraction_grouping_empty_dfr   9  sC     ||~H	z	" X)XzEVWX X Xs   AAc                      t        j                  dg di      } t        j                  t              5  t        d| dd       d d d        y # 1 sw Y   y xY w)Nsome_column)re   rj   r?   r   r   r   r   )df_with_no_groupings    r$   .test_get_mean_grouping_missing_grouping_columnr   A  sM     ,,y'AB	z	" Y)%8*FWXY Y Ys   AAc                      t        j                  dg di      } t        j                  t              5  t        d| dd       d d d        y # 1 sw Y   y xY w)Nr   )NNNr   r   r   r   )df_with_null_groupings    r$   /test_get_mean_grouping_all_null_grouping_columnr   I  sN     LL)5G)HI	z	" e)%:JRcde e es   AAc                      t        j                  t              5  t        dt        dd       d d d        y # 1 sw Y   y xY w)Nr   r   r   r   )r   r   r   r   DUMMY_DF_ELEMENT_TYPEr1   r0   r$   (test_get_mean_grouping_invalid_eval_namer   Q  s9     
z	" ])%:JR[\] ] ]s	   7A )r}   r   r}   c                 0   t         j                  j                  t        d      }t	        | t
        |d       t        j                  t         j                  j                  |d|  d      d      }||    j                         j                         |k(  sJ y )Nr   element_typer   r   z-agg-element-type.tsvr<   r=   )
r&   r,   rA   r(   r   r   rF   rG   r   r   )r}   r   r8   r   s       r$   #test_get_mean_grouping_element_typer   X  s     .0TUJ( 	 
Z4z1F!GHdJ h&&(002i???r0   c                  <   t        t        j                  j                  t        d      d      5 } | j                  d       | j                  d       d d d        t        j                  j                  t        d      }t        t        t        j                  j                  t        d      dd|d	       t        j                  t        j                  j                  |d      d
      }t        |      dk(  sJ |d   j                  d   dk(  sJ y # 1 sw Y   xY w)Nfilter_list.txtwBank Good Credit Loan.pptx
!Performance-Audit-Discussion.pdf
r   r   filtered_metrics.tsvfiler   filter_list	filter_byexport_filenamer8   return_typer<   r=   rj   r   r   )openr&   r,   rA   r(   writer   r   rF   rG   rH   rJ   )r   r8   filtered_dfs      r$   test_filter_metricsr   i  s     
bggll+->?	E 9

12

789 .0KLJGGLL!13DE. ++bggll:7MNTXYK{q   z"''*.JJJJ9 9s   #DDc                  N   t        t        j                  j                  t        d      d      5 } | j                  d       | j                  d       d d d        t        j                  j                  t        d      }t        t        ddgdd	|d
       t        j                  t        j                  j                  |d	      d      }t        d||dd       t        j                  t        j                  j                  |d      d      }t        j                  t        |j                  d         d      sJ t        j                  t        |j                  d         d      sJ t        j                  t        |j                  d         d      sJ y # 1 sw Y   JxY w)Nr   r   r   r   r   r   r   r   r   r   r   r<   r=   allr   two-filename-agg-cct.tsvr}   r   r8   r   r   re   r   L7A`?re   re   Pn?re   rj   粝K?r   r&   r,   rA   r(   r   r   r   rF   rG   r   rs   isclosefloatrJ   r   r8   r   r   s       r$   test_get_mean_grouping_all_filer   ~  sK    
bggll+->?	E 9

12

789 .0KLJ13UV. ++bggll:7MNTXYK#2 RWW\\*6PQW[\J::eJOOD12E:::::eJOOD12E:::::eJOOD12E:::59 9s   #FF$c                     t        t        j                  j                  t        d      d      5 } | j                  d       | j                  d       d d d        t        j                  j                  t        d      }t        t        t        j                  j                  t        d      dd|d	       t        j                  t        j                  j                  |d      d
      }t        d||dd       t        j                  t        j                  j                  |d      d
      }t        j                  t        |j                  d         d      sJ t        j                  t        |j                  d         d      sJ t        j                  t        |j                  d         d      sJ y # 1 sw Y   jxY w)Nr   r   r   r   r   r   r   r   r   r<   r=   r   r   r   r   r   r   r   r   r   r   r   r   s       r$   #test_get_mean_grouping_all_file_txtr     sV    
bggll+->?	E 9

12

789 .0KLJGGLL!13DE. ++bggll:7MNTXYK#2 RWW\\*6PQW[\J::eJOOD12E:::::eJOOD12E:::::eJOOD12E:::59 9s   #F::G)<r&   pathlibr*   r   numpyrs   pandasrF   r   unstructured.metrics.evaluater   r   r   r   r   r,   existsis_in_dockerrA   __file__parentresolveEXAMPLE_DOCS_DIRECTORYr(   rB   rC   rf   rq   ra   rp   r   r   r   fixturer2   markskipifusefixturesrO   parametrizer]   rb   rg   rw   r{   r   r   strintr   r   r   r   r   r   r   r   r   r   r   r1   r0   r$   <module>r      s   	        ww~~m,GLL!!))+T4  77<< 68MN 3 & 8 > 4 'L $r||

 *?--
 %

 *?!6	  # #. L)QR./? 0 S?  a ,$12e$	
 ,0(01	
 )'%)*	
#:/;:/ L)QR./? 0 S? L)QR./6 0 S6" L)QR./ 0 S2 L)QR./' 0 S'" L)QR./ 0 S L)QR./+ 0 S+ L)QR./2^EU4VW	@c 	@c 	@ X 0 S	@ L)QR./9 0 S9* L)QR./
 0 S
( L)QR./X 0 SX L)QR./Y 0 SY L)QR./e 0 Se L)QR./] 0 S]
 L)QR./2^EU4VW@# @# @ X 0 S@ L)QR./K 0 SK& L)QR./; 0 S;< L)QR./; 0 S;r0   