
    :Qg6                     P    d dl Z d dlmZmZmZ d dlZd dlZd dl	m
Z
  G d d      Zy)    N)AnyDictList)compare_contents_as_dfc                   D   e Zd ZddefdZedeeeee	f         dee   fd       Z
edeeeee	f         deeeee	f         dee   fd       Zedeeee	f      dej                  fd	       Ze	 ddeeeee	f         deeeee	f         d
ee   dedeeef   f
d       Zy)TableAlignmentcutoffc                     || _         y )N)r	   )selfr	   s     g/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/metrics/table/table_alignment.py__init__zTableAlignment.__init__
   s	        
table_datareturnc                     | D cg c])  }dj                  |D cg c]  }d|v s|d    c}      + c}}S c c}w c c}}w )a6  Extracts and concatenates the content of cells from each table in a list of tables.

        Args:
          table_data: A list of tables, each table being a list of cell data dictionaries.

        Returns:
          List of strings where each string represents the concatenated content of one table.
         content)join)r   tdds      r   get_content_in_tablesz$TableAlignment.get_content_in_tables   s;     R\\2E1i1n!I,EF\\E\s   =	88	==predicted_table_dataground_truth_table_datac                     t         j                  |      }g }| D ][  }t         j                  |g      d   }t        j                  ||dd      }|j	                  |r|j                  |d         nd       ] |S )a  Compares predicted table data with ground truth data to find the best
        matching table index for each predicted table.

        Args:
          predicted_table_data: A list of predicted tables.
          ground_truth_table_data: A list of ground truth tables.

        Returns:
          A list of indices indicating the best match in the ground truth for
          each predicted table.

        r   g?   r	   n)r   r   difflibget_close_matchesappendindex)r   r   ground_truth_textsmatched_indicesr   	referencematchess          r   get_table_level_alignmentz(TableAlignment.get_table_level_alignment   s    " ,AABYZ& 	\B&<<bTB1EI//	;MVY]^_G""7#5#;#;GAJ#GXZ[	\ r   c                     t        j                  | g d      }|j                  d      }|d   j                  t              |d<   |S )N)	row_index	col_indexr   )columnsr)   r*   )pd	DataFrame	set_indexastypestr)r   dfs     r   _zip_to_dataframez TableAlignment._zip_to_dataframe3   s>    \\*.ST\\+&[/005;	r   r$   c           	      X   g }g }g }g }t        ||       D ]  \  }}	|dk(  rE|j                  d       |j                  d       |j                  d       |j                  d       Q||   }
t        j                  |	      }t        j                  |
      }t	        |j                  d      |j                  d            }|j                  |d          |j                  |d          d}d}d}|
D cg c]  }|d   j                          }}t               }g }|	D ]  }|d   j                         }|d   }|d   }t        j                  |||d	
      }g }|g k7  rt        |      D cg c]  \  }}||d   k(  r||vr| }}}|s9|j                          t        |      D cg c]  \  }}||d   k(  r||vr| }}}|d   }|j                  |       |j                  |       ndg}|d   }|dk\  s|
|   d   } |
|   d   }!|j                  ||f| |!ff        |D ]3  }"|"d   d   |"d	   d   k(  r|d	z  }|"d   d	   |"d	   d	   k(  r|d	z  }|d	z  }5 d}#d}$|dkD  rt        ||z  d      }#t        ||z  d      }$|j                  |#       |j                  |$        t        t        |            D %cg c]	  }%|%|vs|% }&}%|&D ]F  }'|j                  d       |j                  d       |j                  d       |j                  d       H t        t!        j"                  |      d      t        t!        j"                  |      d      t        t!        j"                  |      dz  d      t        t!        j"                  |      dz  d      dS c c}w c c}}w c c}}w c c}%w )a  Aligns elements of the predicted tables with the ground truth tables at the cell level.

        Args:
          predicted_table_data: A list of predicted tables.
          ground_truth_table_data: A list of ground truth tables.
          matched_indices: Indices of the best matching ground truth table for each predicted table.
          cutoff: The cutoff value for the close matches.

        Returns:
          A dictionary with column and row alignment accuracies.

        r   r    by_col_token_ratioby_row_token_ratior   r)   r*   r   r      g      Y@)col_index_accrow_index_acccol_content_accrow_content_acc)zipr!   r   r2   r   fillnalowersetr   r    	enumerateclearaddroundrangelennpmean)(r   r   r$   r	   content_diff_colscontent_diff_rowsr8   r9   idxr   ground_truth_tdpredict_table_dfground_truth_table_dftable_content_diffaligned_element_col_countaligned_element_row_counttotal_element_countgtdground_truth_td_contents_listused_indicesindices_tuple_pairstd_eler   r)   col_idxr&   matching_indicesib_string	b_indicesmatching_indexmatched_idxgt_row_indexgt_col_indexindices_tuple_pairtable_col_index_acctable_row_index_accidnot_found_gt_table_indexes_s(                                           r   get_element_level_alignmentz*TableAlignment.get_element_level_alignment:   se   & ?,@A R	6GCby!((+!((+$$Q'$$Q'5c:O  .??C$2$D$D_$U!!7%,,R0 ''+" $$%78L%MN$$%78L%MN()%()%"#O^,_S^-A-A-C,_),_5L"$ 'e +113";/	 -!331!	 $& b= ,55R+S!'Ax#wqz1a|6K !I !
 %$**, 099V/W% +8'71:5!<:O %	 %
 &/q\N$++N; $$^4(*t$.q1!##2;#?#LL#2;#?#LL'..G0D|UaFb/cdO'eR ': )"%a(+/A!/DQ/GG-2-%a(+/A!/DQ/GG-2-#q(#) #$"#"Q&&+,EH[,[]^&_#&+,EH[,[]^&_#  !45  !45eR	6j s#:;<&
/@YB&
" &
 , 	$A$$Q'$$Q'  #  #		$ #277=#91="277=#91=$RWW->%?%%GK$RWW->%?%%GK	
 	
K -`(!%@&
s   1N*N
%N!
:	N'N'N)g?)__name__
__module____qualname__floatr   staticmethodr   r   r0   r   r   intr'   r,   r-   r2   rf    r   r   r   r   	   sR   u  
]$tDcN/C*D 
]c 
] 
] "4S#X#78!%d4S>&:!; 
c 0 d4S>&: r||   
 	y
"4S#X#78y
!%d4S>&:!;y
 cy
 	y

 
c5j	y
 y
r   r   )r   typingr   r   r   numpyrF   pandasr,   "unstructured_inference.models.evalr   r   rm   r   r   <module>rr      s#     " "   Ek
 k
r   