
    :Qg[y              	          d Z ddlZddlmZ ddlmZ ddlZddlZ ej                  g d      Z
dZ ej                  ddd      Ze G d	 d
             Ze G d d             Z G d d      Zedk(  rddlmZ  ed       ed      gZ ed       ed      gZ eee      D ]f  \  ZZej1                  ee      Zej5                         \  ZZ edej<                   d ee               edej<                   d ee              h yy)zZ
Implements object detection metrics: average precision, precision, recall, and f1 score.
    N)	dataclass)Path)
g      ?g?g333333?g?gffffff?g      ?g?g333333?g?gffffff?g?g)\(?g{Gz?c                   :    e Zd ZU dZeed<   eed<   eed<   eed<   y)#ObjectDetectionAggregatedEvaluationzGClass representing a gathered class-aggregated object detection metricsf1_score	precisionrecallm_apN)__name__
__module____qualname____doc__float__annotations__     b/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/metrics/object_detection.pyr   r      s    QOM
Kr   r   c                   r    e Zd ZU dZeeef   ed<   eeef   ed<   eeef   ed<   eeef   ed<   ed        Z	y)!ObjectDetectionPerClassEvaluationz@Class representing a gathered object detection metrics per-classr   r   r	   r
   c                    t        t        |            D ci c]  }||   ||    }}t        t        |            D ci c]  }||   ||    }}t        t        |            D ci c]  }||   ||    }}t        t        |            D ci c]  }||   ||    }} | ||||      S c c}w c c}w c c}w c c}w )N)rangelen)	clsapr   r	   f1class_labelsir   r
   s	            r   from_tensorsz.ObjectDetectionPerClassEvaluation.from_tensors&   s    49#l:K4LMqLORU*MM<A#lBS<TUq\!_il2U	U6;C<M6NO,q/6!9,OO05c,6G0HI1QA&II8Y55 NUOIs   B,B1'B6B;N)
r   r   r   r   dictstrr   r   classmethodr   r   r   r   r   r      sS    J3:CJe
sEz
6 6r   r   c                   .   e Zd ZeZeZeZ	 d0de	e
j                     de	e
j                     de	e   de	e   de	e   defdZeded	ed
d fd       Zd
eeef   fdZeded
ee	e	f   fd       Zed1deded
e	e   fd       Zede
j                  de
j                  ded
e
j                  fd       Zedej:                  deeef   d
ej:                  fd       Zede
j                  de
j                  d
e
j                  fd       Zde
j                  de
j                  de
j                  de
j                  de
j                  de
j                  d e
j                  d!e
j                  d
e
j                  fd"Z 	 	 d2d#e
j                  d$e
j                  d%ed&eded'ed
efd(Z!de
j                  d)e
j                  de
j                  de
j                  de
j                  d
efd*Z"de
j                  d)e
j                  de
j                  d+ed,e
j                  d-e#fd.Z$y/)3ObjectDetectionEvalProcessordocument_predsdocument_targetspages_heightpages_widthr   devicec                     || _         |D cg c]  }|j                  |       c}| _        |D cg c]  }|j                  |       c}| _        || _        || _        || _        yc c}w c c}w )a  
        Initializes the ObjectDetection prediction and ground truth.

        Args:
            document_preds (list):      list (of length pages of document) of
                                        Tensors of shape (num_predictions, 6)
                                        format: (x1, y1, x2, y2, confidence,class_label)
                                        where x1,y1,x2,y2 are according to image size
            document_targets (list):    list (of length pages of document) of
                                        Tensors of shape (num_targets, 6)
                                        format: (label, x1, y1, x2, y2)
                                        where x,y,w,h are according to image size
            pages_height (list):        list of height of each page in the document
            pages_width (list):         list of width of each page in the document
            class_labels (list):        list of class labels
        N)r(   tor$   r%   r&   r'   r   )	selfr$   r%   r&   r'   r   r(   predtargets	            r   __init__z%ObjectDetectionEvalProcessor.__init__5   sc    2 ;IJ4twwvJAQ Rv6!2 R(&(	 K Rs
   A&A+prediction_file_pathground_truth_file_pathreturnc                    t        |      5 }t        j                  |      }ddd       t        |      5 }t        j                  |      }ddd       t        d         t        d         k(  sJ d       t	        |d         t	        |d         k(  sJ d       t        t        |d   d       t        |d   d             D ]  \  }}|d	   |d	   k(  s.J d
|j                   d|d	    d|j                   d|d	    d	       |d	   }|d   |d   k(  rPJ d|j                   d|d   d    d|d   d    d|j                   d|d   d    d|d   d    d| d        |d   }	| j                  ||	d      }
| j                  ||	      }| j                  |      \  }} | |
||||	      S # 1 sw Y   xY w# 1 sw Y   nxY w)aI  
        Initializes the ObjectDetection prediction and ground truth,
        and converts the data to the required format.

        Args:
            prediction_file_path (Path): path to json file with predictions dump from OD model
            ground_truth_file_path (Path): path to json file with OD ground truth data
        Nobject_detection_classesz5Classes in predictions and ground truth do not match.pagesz:Pages number in predictions and ground truth do not match.c                     | d   S Nnumberr   ps    r   <lambda>z>ObjectDetectionEvalProcessor.from_json_files.<locals>.<lambda>q   s
    AhK r   )keyc                     | d   S r6   r   r8   s    r   r:   z>ObjectDetectionEvalProcessor.from_json_files.<locals>.<lambda>r   s
    Qx[ r   r7   zPage numbers in predictions z (z) and ground truth z) do not match.sizezPage sizes in predictions r   z x    z) do not match for page .T)
prediction)	openjsonloadsortedr   zipname_process_data_parse_page_dimensions)r   r/   r0   fpredictions_dataground_truth_data	pred_pagegt_pagepage_numr   r$   r%   r&   r'   s                 r   from_json_filesz,ObjectDetectionEvalProcessor.from_json_filesU   s     &' 	,1#yy|	,() 	-Q $		!	- &'ABCv89H
 
 	CB	C 
 #G,-g&2
 
 	HG	H 
 #&#G,2GH$W-3HI#
 	Iw X&'(*;; ./C/H/H.I Jh'((;<R<W<W;X YH%&o7;
 !*H V$7 ,-A-F-F,G Hf%a()Yv->q-A,B C$$:$?$?#@76?STCUBVVY6?1%&&>xjK7	& ((BC**+;\VZ*[,,->M$'$>$>?O$P!k>#3\;P\]]G	, 	,	- 	-s   F*F7*F47Gc                 F   g }t        | j                  | j                  | j                  | j                        D ]-  \  }}}}| j                  ||||      }|j                  |       / d\  }}}	}
t        | j                        }t        j                  |t        j                        }t        j                  |t        j                        }t        j                  |t        j                        }t        j                  |t        j                        }t        |      r"t        t        |       D cg c]  }t        j                  |d       }} | j                  | \  }}}}}|j!                         |j!                         |j!                         }
}	}|j!                         }|j!                  d      }|j!                  d      }|j!                  d      }|j!                  d      }t#        |      D ]I  \  }}t%        ||         ||<   t%        ||         ||<   t%        ||         ||<   t%        ||         ||<   K t&        j)                  ||||| j                        }t+        t%        |
      t%        |      t%        |	      t%        |            }||fS c c}w )zGet per document OD metrics.

        Returns:
            tuple: Tuple of ObjectDetectionAggregatedEvaluation and
                ObjectDetectionPerClassEvaluation
        )predstargetsheightwidth)      rU   rU   rU   r   r>   )r   r   r	   r   r   )r   r   r	   r
   )rE   r$   r%   r&   r'    _compute_page_detection_matchingappendr   r   npfullnanlisttorchcat_compute_detection_metricsmean	enumerater   r   r   r   )r+   document_matchingsrQ   rR   rS   rT   page_matching_tensorsmean_apmean_precisionmean_recallmean_f1num_clsmean_ap_per_classmean_precision_per_classmean_recall_per_classmean_f1_per_classxmatching_info_tensorsap_per_present_classesprecision_per_present_classesrecall_per_present_classesf1_per_present_classespresent_classesap_per_classprecision_per_classrecall_per_classf1_per_classr   class_indexod_per_class_evaluationod_evaluations                                  r   get_metricsz(ObjectDetectionEvalProcessor.get_metrics   s     -0!6!68I8I4K[K[.
 
	=)E7FE %)$I$I	 %J %! %%&;<
	=9
5g d''(GGGRVV4#%777BFF#;  " 8GGGRVV4!">B3HZC[>\$]UYYq!_$]!$] 0//&&-*& .224*//1&++- *1KN -113G 266q9L"?"D"DQ"G9>>qA166q9L"+O"< H;16|A1G!+.8=>QRS>T8U(55:;KA;N5O%k216|A1G!+.H #D"P"P .( ** #Q #
 <7^N+%w	
 555k %^s   4Jdatac                     g }g }| d   D ]0  }|j                  |d   d          |j                  |d   d          2 ||fS )zX
        Process the page dimensions from the json file to the required format.
        r4   r=   rS   rT   )rW   )r{   r&   r'   pages       r   rH   z3ObjectDetectionEvalProcessor._parse_page_dimensions   s\    
 M 	6DVX 67tF|G45	6 [((r   r@   c           
      ,   g }| d   D ]  }g }|d   D ]W  }|d   }|j                  |      }|d   \  }	}
}}|r|d   }|j                  |	|
||||g       B|j                  ||	|
||g       Y t        j                  |      }|j                  |        |S )zQ
        Process the elements from the json file to the required format.
        r4   elementstypebboxprob)indexrW   r\   tensor)r{   r   r@   
pages_listr}   page_elementselementclass_label	class_idxx1y1x2y2
confidencepage_tensors                  r   rG   z*ObjectDetectionEvalProcessor._process_data   s    
 
M 	+DM
+ 	F%fo(..{;	!(BB!(J!(("b"b*i)PQ!(()RR)DE	F  ,,}5Kk*	+ r   preds_scores	preds_clstop_kc                    t        j                  |      }|j                  dd      t        j                  |dz   | j                        j                  dd      k(  }| j                  dd      |z  }|j                  dd      \  }}|d|ddf   j                  d	      }||j                  dd
         }	|	j                  d      S )a  
        Get the indexes of all the top k predictions for every class

        Args:
            preds_scores:   The confidence scores, vector of shape (n_pred)
            preds_cls:      The predicted class, vector of shape (n_pred)
            top_k:          Number of predictions to keep per class, ordered by confidence score

        Returns:
            top_k_idx:     Indexes of the top k predictions. length <= (k * n_unique_class)
        r>   r(   r   T
descendingNFas_tupledim)r\   maxviewaranger(   sortnonzerosplit)
r   r   r   n_unique_clsmaskpreds_scores_per_clssorted_scores_per_clssorting_idxidx_with_satisfying_scores	top_k_idxs
             r   _get_top_k_idx_per_clsz3ObjectDetectionEvalProcessor._get_top_k_idx_per_cls	  s      yy+~~b!$1\%8%8)

$q"+  ,00Q7$>-A-F-FqUY-F-Z*{%:6E619%E%M%MW\%M%]" : @ @ @ JK	~~b!!r   boxes	img_shapec                     | dddgf   j                  d|d         | dddgf<   | dddgf   j                  d|d         | dddgf<   | S )a*  
        Clips bboxes to image boundaries.

        Args:
            bboxes:         Input bounding boxes in XYXY format of [..., 4] shape
            img_shape:      Image shape (height, width).
        Returns:
            clipped_boxes:  Clipped bboxes in XYXY format of [..., 4] shape
        .r      r>   )minr      )clip)r   r   s     r   "_change_bbox_bounds_for_image_sizez?ObjectDetectionEvalProcessor._change_bbox_bounds_for_image_size$  so     #3A;/44	!4McAq6k"3A;/44	!4McAq6kr   box1box2c                 V   d } || j                         } ||j                         }t        j                  | dddddf   |ddddf         t        j                  | dddddf   |ddddf         z
  j	                  d      j                  d      }||dddf   |z   |z
  z  S )a  
        Return intersection-over-union (Jaccard index) of boxes.
        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

        Args:
            box1: Tensor of shape [N, 4]
            box2: Tensor of shape [M, 4]

        Returns:
            iou:    Tensor of shape [N, M]: the NxM matrix containing the pairwise IoU values
                    for every element in boxes1 and boxes2
        c                 0    | d   | d   z
  | d   | d   z
  z  S )Nr   r   r   r>   r   )boxs    r   box_areaz7ObjectDetectionEvalProcessor._box_iou.<locals>.box_areaF  s%    FSVOAQ88r   Nr   r   )Tr\   r   r   clampprod)r   r   r   area1area2inters         r   _box_iouz%ObjectDetectionEvalProcessor._box_iou6  s     	9    YYtAtQRK($q!"u+644QSRSQSCTVZ[\^`_`^`[`Va9bbU1XT!W 	
 ag.677r   preds_box_xyxytargets_box_xyxytargets_clspreds_matchedtargets_matchedpreds_idx_to_useiou_thresholdsc	                    | j                  ||   |      }	||   j                  dd      |j                  dd      k7  }
d|	|
<   |	j                  dd      \  }}||d   kD  j                  d      D ]x  \  }}||   }|||f   }|||f   |kD  }t	        j
                  ||ddf    ||ddf          }t	        j
                  ||      }d|||f<   d|||f<   |j                         sw |S  |S )	a  
        Computes the matching targets based on IoU for regular scenarios.

        Args:
            preds_box_xyxy: (torch.Tensor) Predicted bounding boxes in XYXY format.
            preds_cls: (torch.Tensor) Predicted classes.
            targets_box_xyxy: (torch.Tensor) Target bounding boxes in XYXY format.
            targets_cls: (torch.Tensor) Target classes.
            preds_matched: (torch.Tensor) Tensor indicating which predictions are matched.
            targets_matched: (torch.Tensor) Tensor indicating which targets are matched.
            preds_idx_to_use: (torch.Tensor) Indices of predictions to use.

        Returns:
            targets: Computed matching targets.
        r   r>   r   T)r   stableFr   N)r   r   r   r   r\   logical_andall)r+   r   r   r   r   r   r   r   r   ioucls_mismatch
sorted_ioutarget_sortedpred_selected_itarget_sorted_ipred_itarget_iis_iou_above_thresholdare_candidates_freeare_candidates_goods                       r   _compute_targetsz-ObjectDetectionEvalProcessor._compute_targetsU  s\   8 mmN+;<>NO !!1277A>+BRBRSTVXBYYL
 %(HHTH$J!
M 2<nQ>O1O0X0X 1Y 1
 	,O_
 &o6F$_o%EFH &00P%QTb%b" #("3"3vqy))OHaK,H+H#
 #("3"34JL_"`
 >BOH&99:9=M&"556 ""$=	< r   rQ   rR   rS   rT   return_on_cpuc           
         | j                   j                  | j                        }t        |      }|t        |      dk(  rt	        j
                  d|ft        j                  | j                        }	t	        j
                  d|ft        j                  | j                        }
t	        j                  g t        j                  | j                        }t	        j                  g t        j                  | j                        }|dddf   j                  | j                        }|	|
|||fS t	        j
                  t        |      |t        j                  | j                        }	t	        j
                  t        |      |t        j                  | j                        }t	        j
                  t        |      |t        j                  | j                        }
|dddf   |ddddf   |dddf   }}}|dddf   |ddddf   }}| j                  |||      }d	|
ddddf<   d
|
|<   t        |      dkD  r,| j                  |||f       | j                  |||||	|||      }	|	|
|||fS )aC  
        Match predictions (NMS output) and the targets (ground truth) with respect to metric
        and confidence score for a given image.

        Args:
            preds:          Tensor of shape (num_img_predictions, 6)
                            format: (x1, y1, x2, y2, confidence, class_label)
                            where x1,y1,x2,y2 are according to image size
            targets:        targets for this image of shape (num_img_targets, 5)
                            format:     (label, x1, y1, x2, y2)
                            where x1,y1,x2,y2 are according to image size
            height:         dimensions of the image
            width:          dimensions of the image
            top_k:          Number of predictions to keep per class, ordered by confidence score
            return_on_cpu:  If True, the output will be returned on "CPU", otherwise it will be
                            returned on "device"

        Returns:
            preds_matched:      Tensor of shape (num_img_predictions, n_thresholds)
                                True when prediction (i) is matched with a target with respect to
                                the (j)th threshold
            preds_to_ignore:    Tensor of shape (num_img_predictions, n_thresholds)
                                True when prediction (i) is matched with a crowd target with
                                respect to the (j)th threshold
            preds_scores:       Tensor of shape (num_img_predictions),
                                confidence score for every prediction
            preds_cls:          Tensor of shape (num_img_predictions),
                                predicted class for every prediction
            targets_cls:        Tensor of shape (num_img_targets),
                                ground truth class for every target
        r   Nr   )dtyper(   r      r>      TF)r   r*   r(   r   r\   zerosboolr   float32r   r   r   )r+   rQ   rR   rS   rT   r   r   
thresholdsnum_thresholdsr   preds_to_ignorer   r   r   r   	preds_boxtargets_boxr   s                     r   rV   z=ObjectDetectionEvalProcessor._compute_page_detection_matching  s&   R ((++4;;+?
Z=CJ!O!KKN(;5::VZVaVabM#kk1n*=UZZX\XcXcdO <<%--TLRu}}T[[QI!!Q$-**$++*>K /<KWWJejj
  ++L.

4;;
  ++Jejj
 .31b5\5AaC=%PQSTPT+l9	#*1a4='!QqS&/[  66|YPUV $1,1()w<!33EFE?K 11 	M o|YSSr   r   c           	         |j                  | j                        |j                  | j                        }}|j                  | j                        |j                  | j                        |j                  | j                        }}}| j                  j                  | j                        }| j                  }t	        j
                  |      j                         }t        |      |j                  d   }
}	t	        j                  |	|
f| j                        }t	        j                  |	|
f| j                        }t	        j                  |	|
f| j                        }t        |      D ]\  \  }}||k(  ||k(  }}| j                  ||   ||   ||   |j                         ||      \  }}}|||ddf<   |||ddf<   |||ddf<   ^ d|z  |z  ||z   dz   z  }|||||fS )a  
        Compute the list of precision, recall, MaP and f1 for every class.

        Args:
            preds_matched:      Tensor of shape (num_predictions, n_iou_thresholds)
                                True when prediction (i) is matched with a target with respect
                                to the (j)th IoU threshold
            preds_to_ignore     Tensor of shape (num_predictions, n_iou_thresholds)
                                True when prediction (i) is matched with a crowd target with
                                respect to the (j)th IoU threshold
            preds_scores:       Tensor of shape (num_predictions),
                                confidence score for every prediction
            preds_cls:          Tensor of shape (num_predictions),
                                predicted class for every prediction
            targets_cls:        Tensor of shape (num_targets),
                                ground truth class for every target box to be detected

        Returns:
            ap, precision, recall, f1:  Tensors of shape (n_class, nb_iou_thrs)
            unique_classes:             Vector with all unique target classes
        r   r   )r   r   r   	n_targetsrecall_thresholdsscore_thresholdNr   gؗҜ<)r*   r(   r   r   r\   uniquelongr   shaper   r`   "_compute_detection_metrics_per_clssum)r+   r   r   r   r   r   r   r   unique_classesn_classnb_iou_thrsr   r   r	   cls_iclass_valuecls_preds_idxcls_targets_idxcls_apcls_precision
cls_recallr   s                         r   r^   z7ObjectDetectionEvalProcessor._compute_detection_metrics  s   > *7)9)9$++)FHZHZKKI
 OODKK(LL%NN4;;' "-i !2255dkkB..k2779">2M4G4G4K[[';/DKK+ 6t{{K	g{3DKKH"+N"; 	*E;.7;.F{* +M 77+M: / >)-8)--/"3 / 8 	 "BuaxL"/IeQh)F5!8%	*( ]V#y6'9E'AB9fb.88r   r   r   r   c                    |j                   d   }|}t        j                  t        j                  |      t        j                  |            }	t	        |      dk(  rbt        j
                  || j                        t        j
                  || j                        t        j
                  || j                        fS |j                  r,|j                  t        j                  u rt        j                  n|j                  }
t        j                  |j                  |
      d      }||ddf   }|	|ddf   }	||   j                         }t        j                  |dt        j                        }t        j                  |	dt        j                        }||z  }|||z   t        j                   t        j"                        j$                  z   z  }|j'                  d      j)                  d      j*                  j'                  d      }t        j,                  | | d      }|dk(  rCt        j
                  || j                        }t        j
                  || j                        }n||d	z
     }||d	z
     }|j/                  d	d      j1                  |d	      }t        j,                  |j2                  j                         |d
      j2                  }t        j4                  |t        j
                  d	|| j                        fd      }t        j6                  ||d      }|j9                  d      }|||fS )a  
        Compute the list of precision, recall and MaP of a given class for every recall threshold.

        Args:
            preds_matched:      Tensor of shape (num_predictions, n_thresholds)
                                True when prediction (i) is matched with a target
                                with respect to the(j)th threshold
            preds_to_ignore     Tensor of shape (num_predictions, n_thresholds)
                                True when prediction (i) is matched with a crowd target
                                with respect to the (j)th threshold
            preds_scores:       Tensor of shape (num_predictions),
                                confidence score for every prediction
            n_targets:          Number of target boxes of this class
            recall_thresholds:  Tensor of shape (max_n_rec_thresh)
                                list of recall thresholds used to compute MaP
            score_threshold:    Minimum confidence score to consider a prediction
                                for the computation of precision and recall (not MaP)

        Returns:
            ap, precision, recall:     Tensors of shape (nb_thrs)
        r   r   r   Tr   N)axisr   )rightr>   Fr   )inputr   r   )r   r\   r   logical_notr   r   r(   is_cudar   r   uint8argsortr*   
contiguouscumsumr   finfofloat64epsflipcummaxvaluessearchsortedr   repeatr   r]   gatherr_   )r+   r   r   r   r   r   r   r   tpsfpsr   sort_indrolling_tpsrolling_fpsrolling_recallsrolling_precisionslowest_score_above_thresholdr	   r   recall_threshold_idxsampled_precision_pointsr   s                         r   r   z?ObjectDetectionEvalProcessor._compute_detection_metrics_per_cls@  s   @ $))"-m,e.?.?.P
 s8q=K<K<K<  ##(:(:ejj(H KK## 	
 ==!7DI(A+(A+#H-88: ll3QekkBll3QekkB%	1(+%EMM(B(F(FF

 044Q7>>qAHHMMaP (-'9'9MO+4(
$
 )A-[[T[[AFDKKI %%AA%EFF*+G!+KLI .221b9@@aP  %11((*,=U 

! 	 #YYQDKK!PQWX

 $)<<$,@a$
 
 &**1-9f$$r   N)cpu)F)d   T)%r   r   r   IOU_THRESHOLDSr   SCORE_THRESHOLDr   RECALL_THRESHOLDSr   r[   r\   Tensorintr    r.   r!   r   rO   tupler   r   rz   staticmethodr   rH   r   rG   r   rX   ndarrayr   r   r   rV   r^   r   r   r   r   r   r#   r#   0   sx   #N%O) )U\\*) u||,) 3i	)
 #Y) 3i) )@ 2^"2^ !%2^ 
(	2^ 2^hZ6	24UU	VZ6x 	)T 	)eD$J.? 	) 	) D D TRVZ  , "ll"/4||"DG"	" "4 zz&+CHo	 " 8u|| 85<< 8ELL 8 8<II <<I  ,,	I
 \\I ||I I  ,,I I 
Ib "TT||TT TT 	TT
 TT TT TT 
TTlH9||H9 H9 ll	H9
 <<H9 \\H9 
H9Ty%||y% y% ll	y%
 y% !<<y% y%r   r#   __main__)asdictzpths/to/predictions.jsonzpths/to/predictions2.jsonzpths/to/ground_truth.jsonzpths/to/ground_truth2.jsonzMetrics for z:
zPer class Metrics for )r   rB   dataclassesr   pathlibr   numpyrX   r\   r   r  r  r   r  r   r   r#   r   r  prediction_file_pathsground_truth_file_pathsrE   r/   r0   rO   eval_processorrz   metricsper_class_metricsprintrF   r   r   r   <module>r(     sh    !   T  ELLD$/     6 6 6$I
% I
%X z" ""<=tD_?`a())*
 9<69 	d44 6EE "8
 &4%?%?%A""3889VG_<MNO&'='B'B&C3vN_G`Fabc	d r   