
    zIgP                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlmZmZmZmZ d dlZddlmZmZmZ dej6                  d	ej6                  d
ej6                  fdZ	 	 d-dej6                  d	ej6                  dee   ded
ej6                  f
dZ G d d      Z  G d d      Z! G d de      Z" G d de jF                        Z$ G d d      Z% G d de%      Z& G d de%      Z' G d de'      Z( G d  d!e'      Z) G d" d#e'      Z* G d$ d%e jF                        Z+ G d& d'e+      Z,dd(e"jZ                  d)i fd*ee.ef   d+eee.      fd,Z/y).    N)Enum)Path)DictOptionalSequenceTupleUnion)
ModelProtoTensorProtohelpernumpy_helper   )
apply_plotload_model_with_shape_infersmooth_distributionpkqkreturnc                    t        j                  | j                  | j                        }| dd t        j                  | dd |dd z        z  |dd | dk(  |dk\  z  }d||<   | dkD  |dkD  z  }t         j
                  || <   |S )z
    See https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.rel_entr.html#scipy.special.rel_entr.
    Python implementation.
    dtypeNr   )npemptyshaper   loginf)r   r   resc2c1s        _/var/www/html/answerous/venv/lib/python3.12/site-packages/onnxruntime/quantization/calibrate.pyrel_entrr!      s    
 ((288288
,CURVVBqEBqEM**CF
'bAg	BCG
q&R!V	BvvCHJ    baseaxisc                 R   ||dkD  sJ d       |J d       t        j                  |       j                  t         j                        } d| z  t        j                  | |d      z  } t        j                  |      j                  t         j                        }t        j
                  | |      \  } }d|z  t        j                  ||d      z  }t        | |      }t        j                  ||      }||t        j                  |      z  }|j                  | j                        S )z
    Simplifeied version of entropy.
    Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html.
    This avoids taking a dependency on scipy just for this function.
    r   z0base={base} must be a positive number or `None`.z
qk is None      ?T)r$   keepdimsr$   )	r   asarrayastypefloat32sumbroadcast_arraysr!   r   r   )r   r   r#   r$   vecss         r    entropyr0   '   s     <4!8W%WW#>'<'>	B		rzz	*B	rBFF2D48	8B	B		rzz	*B  R(FB	rBFF2D48	8B
2r
C
sA	RVVD\88BHHr"   c                   Z    e Zd Z eg d      Z eg d      Zd Zed        Zed        Z	y)
TensorData)avgstdlowesthighesthist
hist_edgesbins)r3   r4   r5   r6   r8   c                    |j                         D ]  \  }}|t        j                  vr t        d|dt        j                   d      |t        j                  v rmt        |d      st        dt        |       d|      |j                  t        j                  t        j                  fvrt        d|j                   d|      t        | ||        y )NzUnexpected value z not in .r   Unexpected type z for k=zUnexpected dtype )itemsr2   _allowed
ValueError_floatshasattrtyper   r   float16r+   setattr)selfkwargskvs       r    __init__zTensorData.__init__G   s    LLN 	 DAq
+++ #4QE*BUBUAVVW!XYYJ&&&q'*$'7Qyu%MNN772::rzz"::$'8	%NOOD!Q	 r"   c                     t        | d      rt        | d      st        dt        |        d      | j                  | j                  fS )Nr5   r6   z0Attributes 'lowest' and/or 'highest' missing in r;   )rA   AttributeErrordirr5   r6   rE   s    r    range_valuezTensorData.range_valueR   sF    tX&gdI.F #STWX\T]S^^_!`aaT\\**r"   c                     t        | d      rt        | d      st        dt        |        d      | j                  | j                  fS )Nr3   r4   z)Attributes 'avg' and/or 'std' missing in r;   )rA   rK   rL   r3   r4   rM   s    r    avg_stdzTensorData.avg_stdX   sC    tU#74+? #LSQUYKWX!YZZ$((##r"   N)
__name__
__module____qualname__	frozensetr>   r@   rI   propertyrN   rP    r"   r    r2   r2   C   sE    Z[HIJG	  + +
 $ $r"   r2   c                   P    e Zd Zdeeeeef   f   fdZd Z	d Z
d Zd Zd Zd Zy	)
TensorsDatadatac           
      b   || _         i | _        |j                         D ]  \  }}t        |t              st        dt        |       d      t        |t              r|t        j                  k(  r/t        |      dk(  r!t        |d   |d         | j                  |<   t        |      dk(  r)t        |d   |d   |d   |d   	      | j                  |<   t        d
|ddt        |       d| d      t        |t              st        dt        |       d      || j                  |<    y )NzKeys must be strings not r;      r   r   r5   r6         )r5   r6   r7   r9   zUnexpected tuple for rz	, it has z elements: zValues must be TensorData not )calibration_methodrY   r=   
isinstancestr	TypeErrorrB   tupleCalibrationMethodMinMaxlenr2   )rE   r`   rY   rG   rH   s        r    rI   zTensorsData.__init__`   s$   "4	JJL 	DAqa%";DG9A FGG!U#%):)A)AAc!fPQk#-QqT1Q4#HDIIaLq6Q;#-QqT1Q4aPQdYZ[\Y]#^DIIaL"7!uIc!fX[YZX[[\ ]^^a,"@a	 KLLDIIaL	r"   c              #   8   K   | j                   E d {    y 7 wNrY   rM   s    r    __iter__zTensorsData.__iter__r   s     99s   c                     || j                   v S ri   rj   rE   keys     r    __contains__zTensorsData.__contains__u   s    diir"   c                      | j                   |   S ri   rj   rm   s     r    __getitem__zTensorsData.__getitem__x   s    yy~r"   c                 \    || j                   vrt        d|d      || j                   |<   y )Nz)Only an existing tensor can be modified, z is not.)rY   RuntimeError)rE   rn   values      r    __setitem__zTensorsData.__setitem__{   s1    dii!J3'QYZ[[		#r"   c                 6    | j                   j                         S ri   )rY   valuesrM   s    r    rw   zTensorsData.values   s    yy!!r"   c                 6    | j                   j                         S ri   )rY   r=   rM   s    r    r=   zTensorsData.items   s    yy  r"   N)rQ   rR   rS   r   rb   r	   r2   r   rI   rk   ro   rq   ru   rw   r=   rV   r"   r    rX   rX   _   s@    c5UAR;S6S1T $ 
"!r"   rX   c                       e Zd ZdZdZdZdZy)re   r   r   r[   r^   N)rQ   rR   rS   rf   Entropy
PercentileDistributionrV   r"   r    re   re      s    FGJLr"   re   c                   h    e Zd Zed        Zej                  defd       Zd Z	d Z
d Zdedefd	Zy
)CalibrationDataReaderc                 X    t        |d      xr t        |j                        xs t        S )Nget_next)rA   callabler   NotImplemented)clssubclasss     r    __subclasshook__z&CalibrationDataReader.__subclasshook__   s%    x,L(:K:K1L^P^^r"   r   c                     t         )z9generate the input data dict for ONNXinferenceSession runNotImplementedErrorrM   s    r    r   zCalibrationDataReader.get_next   s
     "!r"   c                     | S ri   rV   rM   s    r    rk   zCalibrationDataReader.__iter__   s    r"   c                 6    | j                         }|t        |S ri   )r   StopIteration)rE   results     r    __next__zCalibrationDataReader.__next__   s    >r"   c                     t         ri   r   rM   s    r    __len__zCalibrationDataReader.__len__       !!r"   start_index	end_indexc                     t         ri   r   )rE   r   r   s      r    	set_rangezCalibrationDataReader.set_range   r   r"   N)rQ   rR   rS   classmethodr   abcabstractmethoddictr   rk   r   r   intr   rV   r"   r    r~   r~      sY    _ _ 	"$ " """S "S "r"   r~   )	metaclassc                       e Zd Z	 	 	 	 	 ddeeef   deee      fdZdgfdZ	d Z
defd	Zd
 Zd ZdefdZdefdZy)CalibraterBaseN
model_pathop_types_to_calibratec                 "   t        |t              rt        t        |            | _        n,t        |t              rt        |      | _        nt        d      || _        || _        || _        || _	        || _
        d| _        d| _        dg| _        y)a  
        :param model_path: ONNX model to calibrate. It should be a model file path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb.
        :param per_channel: whether to compute ranges per each channel.
        z model_path should be model path.NCPUExecutionProvider)ra   rb   r   r   modelr?   r   augmented_model_path	symmetricuse_external_data_formatper_channelaugment_modelinfer_sessionexecution_providers)rE   r   r   r   r   r   r   s          r    rI   zCalibraterBase.__init__   s    " j#&4T*5EFDJ
D)4Z@DJ?@@%:"$8!"(@%&!!$:#; r"   r   c                 2    || _         | j                          y)zz
        reset the execution providers to execute the collect_data. It triggers to re-creating inference session.
        N)r   create_inference_session)rE   r   s     r    set_execution_providersz&CalibraterBase.set_execution_providers   s     $7 %%'r"   c                     t        j                         }t         j                  j                  |_        t        j
                  | j                  || j                        | _        y)z9
        create an OnnxRuntime InferenceSession.
        )sess_options	providersN)	onnxruntimeSessionOptionsGraphOptimizationLevelORT_DISABLE_ALLgraph_optimization_levelInferenceSessionr   r   r   )rE   r   s     r    r   z'CalibraterBase.create_inference_session   sN     #1130;0R0R0b0b-(99%%%..
r"   r   c                    |j                   j                  D ci c]  }|j                  | }}|j                  |j                   j                  D ci c]  }|j                  | c}       |j                  |j                   j
                  D ci c]  }|j                  | c}       |j                   j                  D ch c]  }|j                   }}t               }t        j                  t        j                  h}	|j                   j                  D ]  }
| j                  r|
j                  | j                  v s(t        j                  |
j
                  |
j                        D ]a  }||v s||   }|j                   j#                  d      s)|j                   j$                  j&                  |	v sL||vsQ|j)                  |       c  ||fS c c}w c c}w c c}w c c}w )z
        select input/output tensors of candidate nodes to calibrate.
        returns:
            tensors (set): set of tensor name.
            value_infos (dict): tensor name to value info.
        tensor_type)graph
value_infonameupdateoutputinputinitializersetr   FLOATFLOAT16noder   op_type	itertoolschainrB   HasFieldr   	elem_typeadd)rE   r   vivalue_infosotitinitr   tensors_to_calibratetensor_type_to_calibrater   tensor_names               r    select_tensors_to_calibratez*CalibraterBase.select_tensors_to_calibrate   s    .3[[-C-CDrrww{DD%++2D2DEBBGGRKEF%++2C2CDBBGGRKDE-2[[-D-DETtyyEE"u$/$5$5{7J7J#K KK$$ 
	BD--A[A[1[#,??4::t{{#K BK"k1(5GG,,];!#!4!4!>!>BZ!Z!,K!?044[AB
	B $[00) EEDEs   GGGG#c                     | j                   S )zP
        return: augmented onnx model. Call after calling augment_graph
        )r   rM   s    r    get_augment_modelz CalibraterBase.get_augment_model   s     zzr"   c                     t         )z
        abstract method: augment the input model to prepare for collecting data. It will:
            1. augment the model to be able to collect desired statistics data
            2. save augmented model to augmented_model_paths
        r   rM   s    r    augment_graphzCalibraterBase.augment_graph  s
     "!r"   data_readerc                     t         )z
        abstract method: collect the tensors that will be used for range computation. It can be called multiple times.
        r   )rE   r   s     r    collect_datazCalibraterBase.collect_data	  
     "!r"   r   c                     t         )ze
        abstract method: compute data based on the calibration method stored in TensorsData
        r   rM   s    r    compute_datazCalibraterBase.compute_data  r   r"   )Naugmented_model.onnxFFF)rQ   rR   rS   r	   rb   r   r   r   rI   r   r   r
   r   r   r   r~   r   rX   r   rV   r"   r    r   r      s     :>3!& <#t)$ <  (6 <D <R:R (

1 1:""(= ""k "r"   r   c                   z     e Zd Z	 	 	 	 	 	 	 	 ddeeef   deee      f fdZd Z	d Z
defdZd Zd	efd
Z xZS )MinMaxCalibraterr   r   c
                    t         |   ||||||	       g | _        d| _        t	        | j
                  j                  j                        | _        | j
                  j                  j                  D 
ch c]  }
|
j                   c}
| _
        || _        |r|dk  s|dkD  rt        d      || _        || _        yc c}
w )aw  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
        :param averaging_constant: constant smoothing factor to use when computing the moving average.
        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
        :param per_channel: whether to compute ranges per each channel.
        )r   r   r   r   r   Nr   r   z;Invalid averaging constant, which should not be < 0 or > 1.)superrI   intermediate_outputscalibrate_tensors_rangerg   r   r   r   num_model_outputsr   model_original_outputsmoving_averager?   averaging_constantmax_intermediate_outputs)rE   r   r   r   r   r   r   r   r   r   r   	__class__s              r    rI   zMinMaxCalibrater.__init__  s    . 	"7!5%=# 	 	
 %'!'+$!$TZZ%5%5%<%<!=AEAQAQAXAX&Yvv{{&Y#,1A59Ka9OZ[["4(@% 'Zs   5B=c                      j                   j                        \  }}t        t        j                               t        j                  t        j                  dgt        j                              } j                  j                  j                  j                  |       d  fd}|D ]  } ||d        ||d        t        j                   j                   j                   j                          y)	z
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        r   c                     |j                   D ]:  }t        j                  j                  | |j                        s.|j
                  c S  t        d|  d      )Nz&Model does not contain a version for 'z'.)opset_importonnxdefshasdomainversionrs   )r   r   r   s      r    get_op_versionz6MinMaxCalibrater.augment_graph.<locals>.get_op_versionK  sS     % 2 2 099==,*=*=>'///0 !GyPRSTTr"   c                 4   d}| dz   |z   }|dz   }t         j                  j                  || g|g||      }t         j                  j                  d|g|g|      }j                  j                  j
                  D ci c]  }|j                  | }}|j                  j                  j                  j                  D 	ci c]  }	|	j                  |	 c}	       |j                  j                  j                  j                  D 
ci c]  }
|
j                  |
 c}
       | |v r$||    j                  j                  j                  }nt        d| d      j                  r+t        ||    j                  j                  j                   j"                        }d	gt%        d
|      } |j                        dk  r0|j&                  j)                  t        j*                  d|             nt-        t/        j0                               }t3        j4                  t7        j8                  |t6        j:                        |      }|j                  j)                  |       j                  j                  j<                  j)                  |       j                  j                  j>                  jA                  ||g       j                  j                  j                  j)                  t        jB                  ||d g             y c c}w c c}	w c c}
w )Nr   __Reshape)r'   r   Reshape)inputsoutputsr   z'Unable to guess tensor type for tensor zE, running shape inference before quantization may resolve this issue.r   r[      axesr   )"r   r   	make_noder   r   r   r   r   r   r   rB   r   r   r?   r   rg   r   dimrange	attributeappendmake_attributerb   uuiduuid4r   
from_arrayr   arrayint64r   r   extendmake_tensor_value_info)r   reduce_op_namer'   reduce_outputintermediate_outputreduce_nodereshape_noder   r   oi	onnx_typetensor_rankreduced_axesreduce_axes_namereduce_axesr   reshape_shape_namerE   s                   r    add_reduce_min_maxz:MinMaxCalibrater.augment_graph.<locals>.add_reduce_min_maxQ  s    H (#->M"/*"<++//0C/Dx^k 0 K  ;;00+-?@&(	 1 L 261A1A1L1LM2277B;MKM4::3C3C3J3JKa	KL4::3C3C3I3IJa	JKk)'499EEOO	 =k_ MZ [  !+k":"?"?"K"K"Q"Q"U"UV !:E![$9:!.$**=B))001F1Fv|1\]'*4::<'8$"."9"9"((<WYW_W_:`br"sK%%,,-=>JJ$$0077DJJ!!((+|)DEJJ##**6+H+HXadhci+jk3 NKJs   ?LL
L	ReduceMin	ReduceMaxsave_as_external_dataN)r   r   rb   r   r  r   r  r   r  r  r   r   r   r   saver   r   )rE   tensorsr   reshape_shaper  tensorr   r  s   `     @@r    r   zMinMaxCalibrater.augment_graph@  s     55djjA
 .$//"RXX0NPbc

$$++M:	U,	l\  	4Fv{3v{3	4 			JJ%%"&"?"?	
r"   c                     g | _         y ri   r   rM   s    r    clear_collected_dataz%MinMaxCalibrater.clear_collected_data  
    $&!r"   r   c                    	 |j                         }|snt| j                  j                  | j                  j	                  d |             | j
                  2t        | j                        | j
                  k(  r| j                          t        | j                        dk(  r| j                  t        d      | j                         }t        |t              st        dt        |       d      | j                          y )Nr   No data is collected.z+compute_data must return a TensorsData not r;   )r   r   r   r   runr   rg   r  r   r?   r   ra   rX   rc   rB   )rE   r   r   ts       r    r   zMinMaxCalibrater.collect_data  s     ))+F%%,,T-?-?-C-CD&-QR--9112d6S6SS))+  t(()Q.43O3O3W455![)I$q'RSTUU!!#r"   c                 >   |s|S |j                         D ]  \  }}t        |t              r|j                  d   }|j                  d   }n|\  }}t        ||   t              r%||   j                  d   }||   j                  d   }n||   \  }}| j                  r+|| j
                  ||z
  z  z   }	|| j
                  ||z
  z  z   }
nt        ||      }	t        ||      }
t        |t              st        ||   t              rt        |	|
      ||<   |	|
f||<    |S )Nr   r   r\   )r=   ra   r2   rN   r   r   minmax)rE   	old_range	new_rangern   rt   old_minold_maxnew_minnew_max	min_value	max_values              r    merge_rangezMinMaxCalibrater.merge_range  s1   #//+ 	8JC%,++A.++A.#( )C.*5#C.44Q7#C.44Q7#,S> ""#d&=&=7AR&SS	#d&=&=7AR&SS	1	1	 %,
9S>:0V!+9i!P	#"+Y!7	#3	86 r"   r   c           	         t        | j                        dk(  r| j                  S t        t        | j                  d               D cg c])  }| j                  j                         |   j                  + }}| j                  D cg c]  }t        t        ||             }}i }|D ];  }|j                         D ]&  \  }}|j                  |g       j                  |       ( = || j                  d }	t        dt        |	      d      D cg c]  }|	|   j                  d      d    }
}|D ci c]  }|| j                  vs|||    }}g }t        dt        |	      d      D ]  }| j                  r>t!        j"                  ||	|      d      }t!        j"                  ||	|dz         d      }n=t!        j$                  ||	|      d      }t!        j&                  ||	|dz         d      }| j(                  r]t!        j&                  t!        j*                  |      t!        j*                  |      gd      }|j                  t-        | |g             |j                  t-        ||g              t/        t0        j2                  t        t        |
|                  }| j                  r-| j5                  | j                  |      | _        | j                  S || _        | j                  S c c}w c c}w c c}w c c}w )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
        r   Nr[   r   r(   r   )rg   r   r   r   r   get_outputsr   r   zipr=   
setdefaultr   r   
rpartitionr   r   r   meanr&  r'  r   absrd   rX   re   rf   r0  )rE   r  output_namesr	  output_dicts_listmerged_output_dictdrG   rH   added_output_namescalibrate_tensor_namesmerged_added_output_dictpairsmin_value_arraymax_value_arraymax_absolute_valuenew_calibrate_tensors_ranges                    r    r   zMinMaxCalibrater.compute_data  s    t(()Q.///JOPSTXTmTmnoTpPqJrsQ**668;@@ssTXTmTm
=PD\#678
 
  " 	?A	 ?1"--a4;;A>?	? *$*@*@*BC>CAsK]G^`a>b"
9:q!,,S1!4"
 "

 /A$
)*ATMhMhDhA!!$$$
  $
 q#0115 	HA"""$''*BCUVWCX*Y`a"b"$''*BCUVWZ[V[C\*]de"f"$&&)ABTUVBW)X_`"a"$&&)ABTUVYZUZB[)\cd"e~~%'VVRVVO-Dbff_F],^ef%g"U%7$79K#LMNUO_#EFG	H '22C2J2JDQTUkmrQsLt&u#''+/+;+;D<X<XZu+vD( +++ ,GD(+++O t
"
$
s   .K3K8K=1LL)Nr   FFF{Gz?NF)rQ   rR   rS   r	   rb   r   r   r   rI   r   r  r~   r   r0  rX   r   __classcell__r   s   @r    r   r     su     :>3!&!%'A#t)$'A  (6'ARG
R'$(= $(B0,k 0,r"   r   c                   v     e Zd Z	 	 	 	 	 	 	 	 	 d
deeef   deee      f fdZd Z	d Z
defdZdefd	Z xZS )HistogramCalibraterr   r   c                    t         |   |||||       g | _        d| _        t	        | j
                  j                  j                        | _        | j
                  j                  j                  D ch c]  }|j                   c}| _
        d| _        || _        || _        || _        |	| _        d| _        |
| _        yc c}w )a=  
        :param model_path: ONNX model to calibrate. It is a model path.
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        :param scenario: see :class:`DistributionCalibrater`
        )r   r   r   r   N)r   rI   r   r   rg   r   r   r   r   r   r   	collectormethodnum_binsnum_quantized_bins
percentiler   scenario)rE   r   r   r   r   rK  r   rL  rM  rN  rO  r   r   s               r    rI   zHistogramCalibrater.__init__  s    2 	"7!5%= 	 	
 %'!'+$!$TZZ%5%5%<%<!=AEAQAQAXAX&Yvv{{&Y# "4$$(!  'Zs   4Cc                 Z   | j                  | j                        \  | _        }| j                  D ]C  }|| j                  vs| j                  j                  j
                  j                  ||          E t        j                  | j                  | j                  | j                         y)z
        make all quantization_candidates op type nodes as part of the graph output.
        :return: augmented ONNX model
        r  N)r   r   r   r   r   r   r   r   r  r   r   )rE   r   r  s      r    r   z!HistogramCalibrater.augment_graph!  s    
 261Q1QRVR\R\1].!;// 	DFT888

  ''..{6/BC	D 			JJ%%"&"?"?	
r"   c                     g | _         y ri   r  rM   s    r    r  z(HistogramCalibrater.clear_collected_data1  r   r"   r   c           	         	 |j                         }|sn6| j                  j                  | j                  j	                  d|             It        | j                        dk(  rt        d      t        t        | j                  d               D cg c])  }| j                  j                         |   j                  + }}| j                  D cg c]  }t        t        ||             }}i }|D ];  }|j                         D ]&  \  }	}
|j                  |	g       j                  |
       ( = |D ci c]  }|| j                  v s|||    }}| j                  sRt!        | j"                  | j$                  | j&                  | j(                  | j*                  | j,                        | _        | j                  j/                  |       | j1                          yc c}w c c}w c c}w )zy
        Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
        Nr   r"  )rK  r   rL  rM  rN  rO  )r   r   r   r   r#  rg   r?   r   r2  r   r   r3  r=   r4  r   rJ  HistogramCollectorrK  r   rL  rM  rN  rO  collectr  )rE   r   r   r  r8  r	  r9  merged_dictr;  rG   rH   clean_merged_dicts               r    r   z HistogramCalibrater.collect_data4  s     ))+F%%,,T-?-?-C-CD&-QR	  t(()Q.455JOPSTXTmTmnoTpPqJrsQ**668;@@ssTXTmTm
=PD\#678
 
 " 	8A	 81&&q"-44Q78	8 9Df1qDLeLeGeQA.ff~~/{{..#'#:#:??DN 	01!!#/ t
 gs   .GG 3G%G%r   c                 n   | j                   st        d      t        | t              rt        j
                  }nZt        | t              rt        j                  }n9t        | t              rt        j                  }nt        dt        |        d      t        || j                   j                               S )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {tensor name: (min value, max value)}
        z9No collector created and can't generate calibration data.zUnknown calibrater z". This method must be overwritten.)rJ  r?   ra   EntropyCalibraterre   rz   PercentileCalibraterr{   DistributionCalibraterr|   rc   rB   rX   compute_collection_result)rE   cals     r    r   z HistogramCalibrater.compute_dataZ  s    
 ~~XYYd-.#++C23#..C45#00C1$t*=_`aa3 H H JKKr"   )	Nr   FrN  F      -X@same)rQ   rR   rS   r	   rb   r   r   r   rI   r   r  r~   r   rX   r   rE  rF  s   @r    rH  rH    sp     :>3!&*!#t)$*!  (6*!X
 '$$(= $$LLk Lr"   rH  c                   N     e Zd Z	 	 	 	 	 	 	 ddeeef   deee      f fdZ xZ	S )rX  r   r   c	           
      4    t         	|   ||||||||       y)a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        )rK  r   rL  rM  Nr   rI   )
rE   r   r   r   r   rK  r   rL  rM  r   s
            r    rI   zEntropyCalibrater.__init__n  s/    * 	! $1 	 		
r"   )Nr   Fr0   Fr]  r]  
rQ   rR   rS   r	   rb   r   r   r   rI   rE  rF  s   @r    rX  rX  m  sH     :>3!&
#t)$
  (6
 
r"   rX  c                   N     e Zd Z	 	 	 	 	 	 	 ddeeef   deee      f fdZ xZ	S )rY  r   r   c	           
      4    t         	|   ||||||||       y)a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        )rK  r   rL  rN  Nrc  )
rE   r   r   r   r   rK  r   rL  rN  r   s
            r    rI   zPercentileCalibrater.__init__  s/    * 	! $! 	 		
r"   )Nr   FrN  Fr^  r_  rd  rF  s   @r    rY  rY    sH     :>3!&
#t)$
  (6
 
r"   rY  c                   L     e Zd Z	 	 	 	 	 	 ddeeef   deee      f fdZ xZ	S )rZ  r   r   c           	      2    t         |   |||||||       y)a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param scenario: for float 8 only, if `scenario="same"`,
            the algorithm weights and float 8 follow the same distribution,
            if `scenario="p3"`, it assumes the weights follow
            a gaussian law and float 8 ~ X^3 where X is a gaussian law
        )rK  rL  rO  Nrc  )	rE   r   r   r   r   rK  rL  rO  r   s	           r    rI   zDistributionCalibrater.__init__  s,    . 	! $ 	 	
r"   )Nr   Fdistributionr]  r`  rd  rF  s   @r    rZ  rZ    sE     :>3!&
#t)$
  (6
 
r"   rZ  c                   X    e Zd ZdZej
                  d        Zej
                  d        Zy)CalibrationDataCollectorzL
    Base class for collecting data for calibration-based quantization.
    c                     t         )z
        Generate informative data based on given data.
            name_to_arr : dict
                tensor name to NDArray data
        r   rE   name_to_arrs     r    rT  z CalibrationDataCollector.collect  s
     "!r"   c                     t         )z?
        Get the optimal result among collection data.
        r   rM   s    r    r[  z2CalibrationDataCollector.compute_collection_result  s
    
 "!r"   N)rQ   rR   rS   __doc__r   r   rT  r[  rV   r"   r    rk  rk    s;     	" " 	" "r"   rk  c                   d    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zedd       Zd Zd Zy)rS  a`  
    Collecting histogram for each tensor. Percentile and Entropy method are supported.

    ref: https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
                 pytorch_quantization/calib/histogram.html
    c                 f    i | _         || _        || _        || _        || _        || _        || _        y ri   )histogram_dictrK  r   rL  rM  rN  rO  )rE   rK  r   rL  rM  rN  rO  s          r    rI   zHistogramCollector.__init__  s5     " "4$ r"   c                     | j                   S ri   )rs  rM   s    r    get_histogram_dictz%HistogramCollector.get_histogram_dict  s    """r"   c                     t        d       | j                  dv r| j                  |      S | j                  dk(  r.| j                  r| j	                  |      S | j                  |      S t        d      )Nz/Collecting tensor data and making histogram ...>   r0   ri  rN  DOnly 'entropy', 'percentile' or 'distribution' methods are supported)printrK  collect_valuer   collect_absolute_valuer?   rm  s     r    rT  zHistogramCollector.collect  sl    ?@ ;;55%%k22[[L(~~22;??))+66cddr"   c                    |j                         D ]D  \  }}t        |t              rz|D ]2  }t        |t        j                        rJ dt        |       d|        t        d |D              }t        |      dk(  sJ d| d|       t        j                  |      }n6t        |t        j                        st        dt        |       d|      |}|j                         }|j                  dkD  r+t        j                  |      }t        j                  |      }nBt        j                  d|j                        }t        j                  d|j                        }t        j                   |      }|| j"                  vrxt        j$                  || j&                        \  }	}
|
j)                  |j                        }
|j                  t        j*                  k7  sJ d	       |	|
||f| j"                  |<   | j"                  |   }|d
   }|d   }t-        |d      sJ dt        |              t-        |d      sJ dt        |              |d   }|d   }t        j                  |      }||d   kD  rB|d   |d   z
  }t        j.                  |d   |z   ||z   |      }t        j0                  ||f      }t        j$                  ||      \  }	}
|
j)                  |j                        }
|	dt        |      xxx |z  ccc |j                  t        j*                  k7  sJ d	       |	|
t        ||      t        ||      f| j"                  |<   G y)z5
        Collect histogram on absolute value
        r<   z for tensor=c              3   4   K   | ]  }|j                     y wri   r   ).0as     r    	<genexpr>z<HistogramCollector.collect_absolute_value.<locals>.<genexpr>  s     7QWW7s   r   z6The calibration expects only one element type but got r   r   )r9   zMonly float32 or float16 is supported, every constant must be explicitly typedr[   r^   r   z'old_min should be a numpy array but is r   N)r=   ra   listr   ndarrayrB   r   rg   r)   r?   flattensizer&  r'  r  r   absoluters  	histogramrL  r*   float64rA   arangehstack)rE   rn  r  data_arrarrdtypesdata_arr_npr.  r/  r7   r8   old_histogramr*  r+  old_histold_hist_edges	temp_amaxwidthnew_bin_edgess                      r    rz  z)HistogramCollector.collect_absolute_value  sN    !, 1 1 3 4	sFH(D)# mC%c2::6l:J4PS9+Uabhak8ll6m7h77K1$kKF8S_`f_ijk$ jj2"**5 #3DN3C<PVz!Z[[&%--/K!#FF;/	FF;/	HHQk.?.?@	HHQk.?.?@	++k2KT000#%<<$--#P j'..{/@/@A
%%3cbc3/3ZI.V##F+ $ 3 3F ;'*'*w0k4[\`ah\i[j2kk0w0k4[\`ah\i[j2kk0(+!.q!1FF;/	~b11*1-q0AAE$&IInR.@5.H)V[J[]b$cM%'YY/N%ON#%<<.#Q j'..{/@/@A
_s8}%1%%%3cbc3/3ZWiAXZ]^egpZq.r##F+i4	sr"   c           	         |j                         D ]a  \  }}t        j                  |      }|j                         }|j                  dkD  r+t        j
                  |      }t        j                  |      }nBt        j                  d|j                        }t        j                  d|j                        }t        j                  t        t        |      t        |            |j                        }|| j                  v r3| j                  |   }| j                  |||||      | j                  |<   &t        j                  || j                  | |f      \  }}	||	|||f| j                  |<   d y)z1
        Collect histogram on real value
        r   r   r   N)r=   r   r)   r  r  r&  r'  r  r   r7  rs  merge_histogramr  rL  )
rE   rn  r  r  r.  r/  	thresholdr  r7   r8   s
             r    ry  z HistogramCollector.collect_valueH  s<    !, 1 1 3 	FHzz(+H'')H}}q FF8,	FF8,	HHQhnn=	HHQhnn=	S^S^!DHNN[I,,, $ 3 3F ;.2.B.B!8Y	9/##F+ $&<<$--QZPZ\eOf#g j/##F+)	r"   c                    |\  }}}}	}
||
k  rEt        j                  |t        |      |
 |
f      \  }}||z   |t        ||      t	        |	|      |
fS |
dk(  r-t        j                  |t        |      | |f      \  }}||z  }nft        |      }d|
z  |z  }t        ||
z
  |z  dz         }|d|z  z   }||z  |
z   }t        j                  ||| |f      \  }}||||z
  xxx |z  ccc ||t        ||      t	        |	|      |fS )Nr  r   r[   r   )r   r  rg   r&  r'  r   )rE   r  r  r,  r-  new_thresholdr  r  r*  r+  old_thresholdnew_histr   r7   r8   old_num_bins
old_stridehalf_increased_binsnew_num_binss                      r    r  z"HistogramCollector.merge_histogramh  sT   FSC>7G]M),,xX~WdFefKHa8#GW%GW%  !#%<<#h-Q^P^`mOn#o j "8}.=
&)==+HZ*WZ[*[&\#+a2E.EE 3j @= P#%<<,P]~_lNm#n j(<:M+MNRZZNGW%GW% r"   c                 b   | j                   rt        | j                         dk(  rt        d      t        d| j                  d       | j                  dk(  r| j                         S | j                  dk(  r| j                         S | j                  dk(  r| j                         S t        d      )	Nr   z=Histogram has not been collected. Please run collect() first.z0Finding optimal threshold for each tensor using z algorithm ...r0   rN  ri  rw  )rs  rg   r?   rx  rK  compute_entropycompute_percentilecompute_distributionrM   s    r    r[  z,HistogramCollector.compute_collection_result  s    ""c$*=*=&>!&C\]]@~^_;;)#''))[[L(**,,[[N*,,..cddr"   c                    | j                   dk  s| j                   dkD  rt        d      | j                  }| j                   }i }t        dt	        |              t        d| j
                          t        dd|z
   d| d	       |j                         D ]  \  }}|d   }|d
   }|j                         }t        j                  ||z        }	| j                  rft        j                  |	|dz        }
t        j                  ||
   |j                         t        j                  ||
   |j                        f||<   nd|z
  dz  }t        j                  |	d|z
        }
t        j                  |	|      }t        j                  ||   |j                        t        j                  ||
   |j                        f||<   |d   }|d   }||   d   |k  r|||   d
   f||<   ||   d
   |kD  r||   d   |f||<   g ||   |d d ||<   t        j                  j!                  dd      dv st#        ||        |S )Nr   d   z<Invalid percentile. Must be in range 0 <= percentile <= 100.Number of tensors : Number of histogram bins : zPercentile : (g      Y@,)r   r   g      i@r&   r[   r^   QUANTIZATION_DEBUGr   1)rN  r?   rs  rx  rg   rL  r=   r,   r   cumsumr   searchsortedr  r   osenvirongetr   )rE   rs  rN  thresholds_dictr  r  r7   r8   totalcdf	idx_rightpercent_to_cut_one_sideidx_leftr.  r/  s                  r    r  z%HistogramCollector.compute_percentile  sZ   ??Q$//C"7[\\,,__
$S%8$9:;+DMM?;<uz12!J<qAB!/!5!5!7 	-FIQ<D"1JHHJE))D5L)C~~OOCe1CD	 XXj3:;K;KLLHHZ	2*:J:JK+'
 ,1:+=*F'OOC7N1NO	??30GHHHZ19I9IJHHZ	2*:J:JK+' "!I!!Iv&q)I5+4of6Ma6P*Q'v&q)I5+:6+B1+Ey*Q'&K(?&K$r(&KOF#zz~~2A6(B4,;	-> r"   c                    | j                   }| j                  }i }t        dt        |              t        d| j                   d       t        d| j                          |j                         D ]^  \  }}| j                  ||      }|||<   g ||d d ||<   t        j                  j                  dd      dv sMt        |d   |d	          ` |S )
Nr  r  z: (The number may increase depends on the data it collects)zNumber of quantized bins : r[   r  r   r  r   )rs  rM  rx  rg   rL  r=   get_entropy_thresholdr  r  r  r   )rE   rs  rM  r  r  r  optimal_thresholds          r    r  z"HistogramCollector.compute_entropy  s    ,,!44$S%8$9:;+DMM?:tuv+D,C,C+DEF!/!5!5!7 	7FI $ : :9FX Y&7OF#&J(9&JIbqM&JOF# zz~~2A6(B9Q<16	7 r"   c                    |dk  rt        d| d      |d d |dd  z   dz  }|dk(  r| |z  j                         | j                         z  }| |dz  z  j                         | j                         z  |dz  z
  dz  }t        j                  ||j                        t        j                  ||j                        fS t        |      |k(  rt        |      dz  dk(  r| ||z  z  j                         | j                         z  }| ||z  |z
  dz  z  j                         | j                         z  dz  }t        j                  ||j                        t        j                  ||j                        fS t        j                  |      |z  }d|t        j                  |      <   d|t        j                  |      <   t        j                  |      |z  |z  }| |z  j                         | j                         z  }| |dz  z  j                         | j                         z  |dz  z
  dz  }t        j                  ||j                        t        j                  ||j                        fS )	Nr   zpower=z <= 0 is invalid.r   r   g      ?r[   r   )	r?   r,   r   r  r   r   r7  isnanisinf)r7   r8   powerrw   r3   r4   facts          r    _avg_stdzHistogramCollector._avg_std  s   A:veW,=>??Sb/JqrN2c9A:&=%%'$((*4C619$))+dhhj836AcIC88Cz'7'78"((3jN^N^:___u:3u:>Q#6&%-',,.;CFEMC/A55::<txxzIcQC88Cz'7'78"((3jN^N^:___vvf~& RXXd^ RXXd^5(4/f}!!#dhhj0vqy %%'$((*4sAv=#Exx:#3#34bhhs*JZJZ6[[[r"   c           
         | j                   dk  rt        d      | j                  }i }t        dt	        |              t        d| j                           t        d| j
                  d       |j                         D ]E  \  }}|d   }|d   }|j                  t        j                  k7  sJ | j
                  d	k(  r| j                  ||d
      \  }}n2| j
                  dk(  r| j                  ||d
      \  }}nt        d      |j                  t        j                  k7  sJ |j                  t        j                  k7  sJ |j                  t        j                  k7  sJ t        |||||j                         |j                               ||<   t        j                  j!                  dd      dv s:t#        ||       H |S )Ni   z3Invalid num_bins. Must be in range 512 <= num_bins.r  r  zScenario : r  r   r   r`  )r  p3gUUUUUU?z,Invalid scenario. Must be in {'same', 'p3'}.)r3   r4   r7   r8   r5   r6   r  r  )rL  r?   rs  rx  rg   rO  r=   r   r   r  r  r2   r&  r'  r  r  r  r   )	rE   rs  r  r  r  r7   r8   avg_coefstd_coefs	            r    r  z'HistogramCollector.compute_distribution  s   ==3RSS,,$S%8$9:;+DMM?;<DMM,A./!/!5!5!7 	-FIQ<D"1J##rzz111}}&%)]]41]%M"($&%)]]49]%U"( !OPP>>RZZ///>>RZZ///##rzz111&0%!~~'"('OF# zz~~2A6(B4,3	-6 r"   c           	         |d   }|d   }|j                   }|dz  }|dz  }|d   j                  }t        j                  ||z
  dz         }	t	        |	j                         D 
cg c]0  }
t        j
                  d|      t        j
                  d|      f2 }}
t	        ||dz   d      D ]  }
||
z
  }t        ||
z   dz   |      }||   ||   f||
|z
  <   t        j                  |||       }|j                         }t        |d|       }t        ||d       }|dxx   |z  cc<   |dxx   |z  cc<   |dk7  j                  t        j                        }t        j                  |t        j                        }|j                   |z  }t	        |      D ]  }||z  }||z   }t        |||       ||<    |dxx   t        |||z  d       z  cc<   t        j                  |j                   t        j                        }t	        |      D ]+  }||z  }||z   }t        |||       }|dk7  s!||   |z  ||| - t        |      }t        |      }||&t        j
                  t        j                  |      }n!t        j
                  t        ||      |      }||	|
|z
  <    t        j                  |	      }||   }|d   }|d   }|d   |k  r||d   f}|d   |kD  r|d   |f}t!        |d   d      sJ t!        |d   d      sJ |S c c}
w )	aF  Given a dataset, find the optimal threshold for quantizing it.
        The reference distribution is `q`, and the candidate distribution is `p`.
        `q` is a truncated version of the original distribution.
        Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
        r   r   r[   r   Nr   r^   r   )r  r   r   zerosr   r  r&  copydeepcopyr,   r*   r  r   r   r0   argminrA   )rE   r  rM  r7   r8   rL  zero_bin_indexnum_half_quantized_binr   kl_divergencer  
thresholdsr   r   sliced_distributionpleft_outliers_countright_outliers_countnonzerosquantized_binsnum_merged_binsindexstartendqnormdivmin_kl_divergence_idxr  r.  r/  s                                  r    r  z(HistogramCollector.get_entropy_threshold  sw    |q\
99!Q!3q!8!""2H!H1!LMTYZgZlZlTmnqrxx/!51IJn
n  -~/A1E .	<A(1,KNQ.2H=I6@6MzZcOd5eJq112"&--[0K"L $((*A"%d<K&8"9#&tIJ'7#8 aD''DbE))E Qrxx0H  XX&8IN166:LLO 12 L/o-(+,?c,J(Ku%L 2#&9:L:^:`&a"bb rxx0A12 @/o-8E#./19#1%#84#?AeCL@ $A&A#A&AyAIhhrvvU3hhwq!}E:8;M!445].	<` !#		- 8&'<=aL	aL	Q)+!*,=a,@ AQ)+!21!5y A(+W555(+W555  U os   "5L	N)r   )rQ   rR   rS   rp  rI   ru  rT  rz  ry  r  r[  r  r  staticmethodr  r  r  rV   r"   r    rS  rS    s]    !#e8st@@e,\* \ \*&PX!r"   rS  r   Fr   r   c                 p   d }|t         j                  k(  rp|j                  dd      }|j                  dd      }|j                  dd      }	|j                  dd       }
|j                  dd      }t        | ||||||	|
|	      }n |t         j                  k(  rI|j                  d	d
      }|j                  dd
      }|j                  dd      }t        | ||||||      }n|t         j                  k(  rI|j                  d	d      }|j                  dd      }|j                  dd      }t        | ||||||      }nH|t         j                  k(  r5|j                  d	d      }|j                  dd      }t        | |||||      }|r"|j                          |j                          |S t        d|       )Nr   Fr   r   rD  r   r   )r   r   r   r   r   r   rL  r]  rM  )r   r   rL  rM  r^  rN  r_  T)r   r   rL  rN  rO  r`  )r   rL  rO  zUnsupported calibration method )re   rf   r  r   rz   rX  r{   rY  r|   rZ  r   r   r?   )r   r   r   calibrate_methodr   extra_options
calibratorr   r   r   r   r   rL  rM  rN  rO  s                   r    create_calibratorr  r  s    J,333!%%k59	&**+;UC*../CTJ#0#4#45OQU#V #''u=%! %=)1%=#


 
.66	6 $$Z5*../CSI!%%k59	&! %=1

 
.99	9 $$Z6"&&|V<
!%%k48	)! %=!

 
.;;	; $$Z6 $$Z8+! %=

   "++-
67G6HI
JJr"   )Nr   )0r   r  r   r  r   enumr   pathlibr   typingr   r   r   r   r	   numpyr   r   r
   r   r   r   r   quant_utilsr   r   r   r  r!   floatr   r0   r2   rX   re   ABCMetar~   r   r   rH  rX  rY  rZ  rk  rS  rf   rb   r  rV   r"   r    <module>r     s      	    9 9   > >  U U  

 " !	





 5/ 	
 ZZ8$ $8%! %!P "ckk "4k" k"\[,~ [,|vL. vLr
+ 
D
. 
D 
0  
F" ",E!1 E!T 6:/&--"KKdKK#HSM2KKr"   