
    zIg{                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Z
d dl
mZmZmZ d dl
mZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ 	 d d
lmZ 	 d dlmZmZ dZ dZ!dZ"dZ#dZ$dZ%dZ&dZ'dZ(i Z) e*e      D  ci c]  }  e+ e,e|       e-      s e,e|       |  c} Z. G d de      Z/ G d de      Z0 G d de      Z1 G d de      Z2ej                  jf                   e	jh                  d      ej                  jj                   e	jh                  d      ej                  jl                   e	jh                  d      ej                  jn                   e	jh                  d       ej                  jp                  eej                  jr                  eej                  jt                  eiZ;ej                  jj                   e	jx                  d e	jz                  !       e	jx                  d"e	jz                  !      fej                  jf                   e	jx                  d#e	j|                  !       e	jx                  d$e	j|                  !      fej                  jn                   e	jx                  d e	j~                  !       e	jx                  d%e	j~                  !      fej                  jl                   e	jx                  d&e	j                  !       e	jx                  d'e	j                  !      fej                  jt                   e	jx                  d e!       e	jx                  d(e!      fej                  jr                   e	jx                  d)e!       e	jx                  d*e!      fiZAej                  jf                   e	jx                  d+e	j|                  !       e	jx                  d$e	j|                  !      fej                  jl                   e	jx                  d,e	j                  !       e	jx                  d'e	j                  !      fiZBej                  jj                   e	jx                  d e	jz                  !       e	jx                  d$e	jz                  !      fej                  jf                   e	jx                  d-e	j|                  !       e	jx                  d.e	j|                  !      fej                  jn                   e	jx                  d e	j~                  !       e	jx                  d'e	j~                  !      fej                  jl                   e	jx                  d/e	j                  !       e	jx                  d0e	j                  !      fej                  jt                   e	jx                  d e!       e	jx                  d*e!      fej                  jr                   e	jx                  d1e!       e	jx                  d2e!      fiZCd3d4d5ZDd[d6ZEd\d7ZFd8 ZG	 d]d9ZHd^d:ZId^d;ZJd_d<ZKd`d=ZL G d> d?      ZM G d@ dA      ZN G dB dC      ZOdD ZPdE ZQdF ZRdG ZSdadHZTdI ZUdbdJZVdcdKZWdddLZXdedMZYdfdNZZdgdOZ[dfdPZ\dgdQZ]dhdRZ^didSZ_djdTZ`dkdUZadkdVZbdldWZcdldXZddldYZedldZZfy# e$ r dZY w xY w# e$ r dZdZY w xY wc c} w )m    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4zonnx.quantizez0.1.0zai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedc                  *    e Zd ZdZdZd Zed        Zy)QuantizationModer      c                    | j                   S Nnameselfs    a/var/www/html/answerous/venv/lib/python3.12/site-packages/onnxruntime/quantization/quant_utils.py__str__zQuantizationMode.__str__<       yy    c                D    	 t         |    S # t        $ r t               w xY wr   )r   KeyError
ValueError)modes    r$   from_stringzQuantizationMode.from_string?   s)    	#D)) 	,	    N)__name__
__module____qualname__
IntegerOps
QLinearOpsr%   staticmethodr,    r'   r$   r   r   8   s%    JJ  r'   r   c                  *    e Zd ZdZdZd Zed        Zy)QuantizedValueTyper   r   c                    | j                   S r   r    r"   s    r$   r%   zQuantizedValueType.__str__K   r&   r'   c                D    	 t         |    S # t        $ r t               w xY wr   )r6   r)   r*   )vs    r$   r,   zQuantizedValueType.from_stringN   s)    	%a(( 	,	r-   N)r.   r/   r0   InputInitializerr%   r3   r,   r4   r'   r$   r6   r6   G   s%    EK  r'   r6   c                  N    e Zd ZdZdZdZdZdZdZdZ	d Z
ed	        Zed
        Zy)	QuantTyper   r                  c                    | j                   S r   r    r"   s    r$   r%   zQuantType.__str___   r&   r'   c                D    	 t         |    S # t        $ r t               w xY wr   )r=   r)   r*   )ts    r$   r,   zQuantType.from_stringb   s(    	Q< 	,	r-   c                
   | t         j                  k(  rt        j                  S | t         j                  k(  rt        j
                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S t!        d| d      )NzUnexpected value qtype=.)r=   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r*   r"   s    r$   tensor_typezQuantType.tensor_typei   s    9??"###9###$$$9$$$%%%9###$$$9***+++9###$$$9??"###24(!<==r'   N)r.   r/   r0   rH   rJ   rP   rN   rL   rT   rR   r%   r3   r,   propertyrV   r4   r'   r$   r=   r=   V   sR    EFMFGEF   > >r'   r=   c                  *    e Zd ZdZdZd Zed        Zy)QuantFormatr   r   c                    | j                   S r   r    r"   s    r$   r%   zQuantFormat.__str__   r&   r'   c                D    	 t         |    S # t        $ r t               w xY wr   )rY   r)   r*   )formats    r$   r,   zQuantFormat.from_string   s)    	v&& 	,	r-   N)r.   r/   r0   	QOperatorQDQr%   r3   r,   r4   r'   r$   rY   rY   |   s%    I
C  r'   rY   int8uint8int16uint16dtype   i   i  i i     i   iii@   i i @  r?   zero_point_indexc                @   g }t        |      D ]  \  }}t        j                  t        |      t        j                        r%|j                  t        j                  |             n=t        |t        j                        r|j                  |       nt        d| d|       || k(  s|d   }|j                  t        j                  k(  s|j                  t        j                  k(  st        d|j                          t        |      dkD  rt        |      S |d   S )Nzarg z is not an array: rk   zzero_point cannot be r   r   )	enumeratenumpy
issubdtypetypenumberappendarray
isinstancendarray	TypeErrorrd   float32float16lentuple)rm   argsnew_argsiar9   s         r$   _check_typer      s    H$ 
C1DGU\\2OOEKKN+5==)OOAd1#%7s;<<  Aww%--'177emm+C"7y ABB
C "(ma/5?@Xa[@r'   c                   | t         v sJ d|  d       | t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  fv r/|dk7  rt        d|d      |j                  t        j                  k(  rt        j                  }nG|j                  t        j                  k(  rt        j                  }nt        d|j                   d      t        t!        t#        dg dgt$        j&                  j)                  d| g dg      	      t#        d
g ddg      gdt+        d|d       t+        d|d       gt+        d| d       g            }t-        |      }t/        |j1                  d ||d      d         S t         |    }	t3        | dd      \  }
}|t5        |
|      n|
}|t7        ||      n|}t        j8                  |j;                  t        j                        |z  j=                         |z         }t        j>                  ||||       t/        |j;                  |	            S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rG   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   FT)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rQ   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrd   rp   ry   FLOATrz   FLOAT16r*   r   r
   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrd   qminqmaxcliplowcliphigharr_fp32s                  r$   quantize_nparrayr      s-   %%e	ug%cde%++--))--	  ?%(Z[eZhhi&jkk99%#))IYY%--'#++I01=>>"Bdkk>U>UVbdikmpqor>s .0LseT	 *3	4@*7ItD (UD9:

  !,3774sU)CDQGHH %U+.u5TXYt$'O#dC.&*&63tT?D==#**U]]";e"C!J!J!Lz!YZ

8WhH=8??5122r'   c           	     ^   |dkD  s|dk  rt        d| d|       t        j                  | t        j                  d| j                              } t        j
                  |t        j                  d|j                              }|t        || |z         }|rBt        j
                  t        j                  |       t        j                  |            }| } |}||k  sJ d|  d|        t        j                  || z
  t        j                        }t        j                  |t        j                        t        j                  |t        j                        z
  }t        j                  ||z        }	|	dk\  sJ d       |	t        j                  |j                        j                  k  rFt        j                  d|j                        }	t        j                  d|j                        }
|
|	gS |r^t        j                  t        j                  ||z   t        j                  d	t        j                        z        |j                        }
n:t        j                  t        j                  || |	z  z
        |j                        }
|	j                  |j                        }	|
|	gS )
a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rc   zqmin=z > qmax=z
scale isse      ?g       @)r*   rp   minimumru   rd   maximumr   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r$   compute_scale_zpr      s   ( ax4!8]^b]ccklpkqrss
 ==u{{1DJJ?@D==u{{1DJJ?@D !4./uyy		$@ww4<55htf55<	TD[	6B	T	/%++d%--2X	XBKKR EA:#|#:u{{4::&+++Ctzz2[[$**5
   TD[EKK5==,QQRZ^ZdZdJ U[[u1D%ETZZXJTZZ(r'   c                   d}| t         vr| t        j                  k(  rddlm} ddlm} |}t        d      D cg c]
  } ||       }}t        j                  |D cg c]0  }t        j                  |      rt        j                  |      r/|2 c}t        j                        }nt        d|  d      |t         | <   n| t        j                  k(  rddlm} |}|t        d	|  d
      t        j                  t         |          }	t        j                  d|      }
t        j                  ||	z  |j                         }|
|gS c c}w c c}w )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   )float8e4m3_to_float32r      rc   zQuantization to element_type=z not implemented.zUnexpected element_type rG   )FLOAT8_DISTRIBUTIONSr   rQ   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangerp   ru   isnanisinfry   r*   rx   stdrd   )element_typer   zp_dtyper   r   r   
all_valuesfvaluesstd_f8zeror   s               r$   compute_scale_zp_float8r   0  s%    H//;333?H#H<A#JGq/2GJG[[&Tqekk!nU[[QR^T\a\i\iF <\NJ[\]]-3\*	11	1D2<.BCCYY+L9:F;;q)DKKfCII6E%=# HTs   E
E8EEc                6   t        | t        j                        st        dt	        |        d      ||}nt        |       r| j                         nd}||}nt        |       r| j                         nd}t        j                  || j                        }t        j                  || j                        }d}	t        j                  d| j                        }
|t        j                  k(  r|rt        d      t        j                  |       }t        ||      \  }	}
t        || |
|	      }t!        |j#                  t        j$                        j'                         dz  dk(        ret        j(                  |       }t        d	|j                          d
|j                          d|j                          d
|j                          d	      t+        |||	|
|d      S |t        j,                  t        j.                  t        j0                  t        j2                  t        j4                  t        j6                  fv rNt        |       r$t9        |||      \  }}t;        ||||||      \  }	}
t        || |
|	      }t+        |||	|
|d      S t=        d| d      )a  
    :param data: data to quantize
    :param qType: data type to quantize to. Supported types UINT8 and INT8
    :param symmetric: whether symmetric quantization is used or not. This is applied to INT8.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    z%Weight must be given as an array not rG   g        rc   r   r   z1Unsupported option reduce_range=True for float 8.rf   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].r>   rl   r   zUnexpected value for qType=)rv   rp   rw   rx   rr   r{   r   r   ru   rd   r   rQ   RuntimeErrorr   r   r   anyr   r`   ravelr   r   rI   rK   rO   rM   rU   rS   r   r   r*   )datar   r   r   r   rmin_overridermax_overrider   r   r   r   r   quantized_datanp_datar   r   s                   r$   quantize_datar   U  sK   8 dEMM*?T
|1MNN  YtxxzC  YtxxzC;;t4::.D;;t4::.DJKK4::.E(((RSSiio3E3?
E)%ujI%%ekk288:S@SHImmD)G=gkkm_Bw{{}o ^&&4&8&8&:%;2n>P>P>R=SSUW  4z5.[\]]  t90PYZJD$ 0tT4Tb cJ)%ujI4z5.[\]]
25';
<<r'   c                j   | t         j                  j                  k(  rt        d      d}|rt        j                  |       }n)|r| t        v r
t        |    }nt        j                  |       }|st        d|  d      |\  }}|dkD  s|dk  r't        d| d| d|j                   d	| d
| d|        |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rQ   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr*   rd   )r   r   r   qranger   r   s         r$   r   r     s     
&&333!"_``F,007	u ==.u5$((/07uvwwJD$ax4!86$x

|?<. Y"8E74
 	
 Mr'   c                .    t        | ||      \  }}||z
  S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   s        r$   get_qrange_for_qTyper     s      )	RJD$$;r'   c                :    | dk  r| |z   n| }|dk\  xr ||k  }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r4   )axisrank	axis_normis_valids       r$   normalize_axisr     s3      $axtTIA~2)d"2HYr'   c                    t        |       }|dk(  r
t               S |dz   dz  }t        |      }d}d}||dz
  k  r-| |dz      dz  dz  | |   dz  z  ||<   |dz  }|dz  }||dz
  k  r-||k  r| |   dz  ||<   |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r   r>   rg   r@   )r{   	bytearray)src_8bit	num_elemsdst_sizedstsrc_idst_is         r$   pack_bytes_to_4bitr     s     HIA~{A!#H
H
CEE )a-
	*S0Q68E?S;PQE


 )a-

 ye_s*E
Jr'   c                      e Zd ZdZg g dfdZy)QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y r   )	r!   initializerrminsrmaxszero_pointsscalesr   r   r   )
r#   r!   r   r   r   r   r   r   r   r   s
             r$   __init__zQuantizedInitializer.__init__  sF     	&

&	,	r'   r.   r/   r0   __doc__r   r4   r'   r$   r   r     s     r'   r   c                       e Zd ZdZ	 	 	 	 ddZy)QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y r   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r#   r!   new_quantized_namer  zero_point_namequantized_value_typer   r  r  r	  s
             r$   r   zQuantizedValue.__init__  sD     "($&.	"$$r'   )NNNNr   r4   r'   r$   r   r     s     %r'   r   c                      e Zd ZdZd Zy)BiasToQuantizez+
    Represents a bias to be quantized
    c                .    || _         || _        || _        y r   )	bias_name
input_nameweight_name)r#   r  r  r  s       r$   r   zBiasToQuantize.__init__5  s    "$&r'   Nr   r4   r'   r$   r  r  0  s    'r'   r  c                   | j                   dk(  rt        d| j                   d      | j                   dk(  r| j                  }n#| j                   dk(  r| j                  }n| j                   dk(  r| j
                  }n| j                   dk(  r| j                  }n| j                   dk(  r| j                  }n| j                   d	k(  r| j                  }n| j                   d
k(  r| j                  }nz| j                   dk(  r| j                  }n^| j                   dk(  r| j                  }nB| j                   dk(  r| j                  }n&t        d| j                   d| j                    d      | j                  |iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r   r>   r?   r@   rA   rB   rh      	   
   z has unsupported type rG   )rr   r*   r!   r   r   srE   gfloatsintsstringstensorsgraphs)	attributer   s     r$   attribute_to_kwargr  ;  s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r'   c                t    |D cg c]  }|j                   | k(  s| }}t        |      dkD  r|d   S dS c c}w )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    r   N)r!   r{   )	item_name	item_listitemitemss       r$   find_by_namer%  `  sA     (Bd499	+ATBEB5zA~58/4/ Cs   55c                R    d}t        t        |            D ]  }||   | k(  s|} |S )zC
    Helper function to return index of an item in a node list
    rk   )r   r{   )	elem_name	elem_listelem_idxr   s       r$   get_elem_indexr*  k  s9     H3y>" Q<9$H Or'   c                H    t         j                  j                  d| |g|      S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr!   s      r$   get_mul_noder/  v  s!     ;;  $??r'   c                l    | j                   j                  | j                  |z   | j                  z         S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)filename
identifiers     r$   generate_identified_filenamer7    s+     ??##HMMJ$>$PQQr'   c                `   dd l }dd lm} dd l} |j                  |j
                         t        d       t        |        t        d       t        |       |j                  | |d       |j                  d       |j                  d       |j                  d	       |j                          y )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrp   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesr<  pltrp   s        r$   
apply_plotrJ    s    #ES[[1	,	$K	
	*JJtZdJ+JJ~JJxII()HHJr'   c                   ddl }ddl}ddlmc mc m} ddlmc mc m} t        j                  d|         t        t        j                  j                  |d      d      5 }|j                  |j                  |              ddd       |j!                  d      }g }t#        | j%                               D ]  }	| |	   }
t'        t)        t+        |
d         t+        |
d                     }|j-                  |	      }|j-                  |      }|j/                  |       |j1                  ||       |j3                  ||       |j5                  |      }|j7                  |        |j9                  |t;        |             |D ]  }|j=                  |        |j?                         }|jA                  |       |jC                  ||       |jE                  |      }|jG                  |       |jI                         }t        t        j                  j                  |d      d	      5 }|j                  |       ddd       t        jJ                  jM                  d
d      dv r|j                  jO                  |d      }|jQ                         }tS        |      D ]Y  }|jU                  |      }t        j                  |jW                                t        j                  |jY                                [ t        t        j                  j                  |d      d      5 }t#        | j%                               D ]\  }	| |	   }|	dz   t'        t)        t+        |d         t+        |d                     z   }|j                  |       |j                  d       ^ 	 ddd       y# 1 sw Y   $xY w# 1 sw Y   yxY w# 1 sw Y   yxY w)z>
    Helper function to write calibration table to files.
    r   Nzcalibration cache: zcalibration.jsonwi   r   zcalibration.flatbufferswbQUANTIZATION_DEBUG)r   1zcalibration.cache 
)-jsonflatbuffers5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTablelogginginfoopenospathjoinwritedumpsBuildersortedkeysstrr   r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndrt   TrtTableStartDictVectorr{   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironr   GetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirrR  rS  rW  rY  filebuilderkey_value_listkeyr   r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   r  s                        r$   write_calibration_tabler    s,   
 LLLLLL&'8&9:;	bggll3 23S	9 2T

4::/012 !!$'GN',,./ )"3'CF1IF1I78'',))%0
w'2!!':6((1	i() $$Wc..AB# 3	''	23!!#I7#Wi0$$W-INN9
..
C	bggll3 9:D	A T

3 
zz~~*A.(:%%77Q?	'')x 	,A!q)ILL)LL*+	, 
bggll3 34c	: d+0023 	C%c*Ec	CCaM3uQx= ABBAJJqMJJt		 [2 2@  s%   #!N&*N3#A9O &N03N= O	c                   | dk(  j                  t        j                        }| dk7  j                  t        j                        }|j                         }| j                  |z
  }|sy|t        |      z  t        |      z  }|dk  sJ d|||fz         | j                  t        j                        }|||z  | |z  z   z  }|dk  j                         dk(  sJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   z"n_zeros=%d, n_nonzeros=%d, eps1=%f)r   rp   ry   sumsizefloat)pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1rG  s           r$   smooth_distributionr    s     Qu}}-H6//%--0KllnG'!Jw%
"33D#: ;?  : 88EMM"DC(Nte{222DAI??!!!Kr'   c                    t        j                  | j                         d      }|j                  j                  D ]  }t        j                  |      s y y)NF)load_external_dataT)r   loadas_posixgraphr   r   uses_external_data)
model_pathmodel
intializers      r$   model_has_external_datar    sJ    IIj))+FEkk-- 
22:> r'   c                    t               }|j                         |_        t        j                  |_        i }dg|d<   t        | j                         |fddgi|}y)z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  opt_model_pathsess_optionkwargs_s        r$   optimize_modelr    sb     !"K+9+B+B+DK(+A+R+RK(F%6$7F !,,.jH^G_jcijAr'   c                    ddi}| j                   r8| j                   D ])  }|j                  |j                  |j                  i       + t        j
                  j                  | |       y)z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater~  r   r   r   set_model_props)r  r  props      r$   add_pre_process_metadatar    sZ    .0CDN(( 	:D!!488TZZ"89	:KK~6r'   c                    | j                   r2| j                   D ]#  }|j                  dk(  s|j                  dk(  s# y y)zCCheck the model whether it went through quantization pre-processingr  r  TFr  r~  r   )r  r  s     r$   model_has_pre_process_metadatar    sA    (( 	Dxx33

FY8Y	 r'   c                    ddi}| j                   r8| j                   D ])  }|j                  |j                  |j                  i       + t        j
                  j                  | |       y )N
onnx.inferr  r  )r  r  r  s      r$   add_infer_metadatar  $  sZ    "$78N%% 	4A!!155!''"23	4KK~6r'   c                    | j                   r2| j                   D ]#  }|j                  dk(  s|j                  dk(  s# y y)Nr  r  TFr  )r  r  s     r$   model_has_infer_metadatar  ,  s@    %% 	Auu$4G)G	 r'   c                    t        | d      }t        j                  j                  t	        |       t	        |             t        j
                  |j                               }t        |       |j                          |S )Nz	-inferred)	r7  r   shape_inferenceinfer_shapes_pathre  r  r  r  unlink)r  inferred_model_pathr  s      r$   load_model_with_shape_inferr  4  s`    6z;O**3z?C@S<TUII)2245Eu Lr'   c                    t        j                  d      5 }t        |      j                  d      }t	        j
                  | |j                         d       t        |      cd d d        S # 1 sw Y   y xY w)Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)tempfileTemporaryDirectoryr   r2  r   
save_modelr  r  )r  quant_tmp_dirr  s      r$   &save_and_reload_model_with_shape_inferr  =  s]    		$	$L	9 7]-(11,?
z224DQ*:67 7 7s   AA,,A5c                   | j                   t        j                  j                  t        j                  j                  fv rt
        j                  j                  |       S t        d| j                   dt        | j                             )Nz&Only float type is supported. Weights z is )	data_typer   r   r   r   r   numpy_helperto_arrayr*   r!   type_to_name)r   s    r$   tensor_proto_to_arrayr  D  su    !7!7!=!=z?U?U?]?] ^^  ))+66

01A1A0B$|T_TiTiGjFkl r'   c                    | dz   S )N_QuantizeLinearr4   tensor_names    r$   add_quant_suffixr  M  s    ***r'   c                    | t         z   S r   )QUANT_INPUT_SUFFIXr  s    r$   add_quant_input_suffixr  Q  s    +++r'   c                    | dz   S )N_QuantizeLinear_Outputr4   r  s    r$   add_quant_output_suffixr  U  s    111r'   c                    | dz   S )N_DequantizeLinearr4   r  s    r$   add_dequant_suffixr  Y  s    ,,,r'   c                    | dz   S )N_DequantizeLinear_Inputr4   r  s    r$   add_dequant_input_suffixr  ]  s    222r'   c                    | t         z   S r   )DEQUANT_OUTPUT_SUFFIXr  s    r$   add_dequant_output_suffixr  a  s    ...r'   )NN)FN)FNNN)FF)r   intr   r  returnztuple[bool, int])r   bytesr  r   )r5  r   r6  re  r  r   )rG   )g-C6?)r  r   )r  r   r  r   )r  r   )r  r   r  bool)r  r   r  r   )r  r   r  r   )r   r   r  znumpy.ndarray)r  re  r  re  )r  re  )g
__future__r   rZ  r]  r  enumr   pathlibr   rp   r   r   r   r   r	   r   onnx.helperr
   r   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr  DEQUANT_OP_NAMEr  TENSOR_NAME_QUANT_SUFFIXr   rz  rv   getattrr  r  r   r6   r=   rY   rI   rd   rK   rO   rM   rQ   rU   rS   r   ru   r`   r_   rb   ra   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r%  r*  r/  r7  rJ  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )ks   0r$   <module>r     s   #  	      > > & Q Q - P P@? 	 , $2 '  474Dqq
SZ[fhiSjloHpQ'*qt  #> #>L$   V!4  +%++g"6  +%++g"6!!;5;;x#8''  %    ;5;;q#DkekkRU]b]h]hFi"j+%++d%**"E{u{{SV^c^h^hGi!j!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q  ;5;;q#>BV[@\"]+%++b"={u{{1TX?Y!Z  +%++d%**"E{u{{SV^c^h^hGi!j  ;5;;vU[[#I;5;;W\didodoKp"q!    ;5;;q#DkekkRU]b]h]hFi"j+%++c"DkekkRT\a\f\fFg!h!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q  ;5;;q#={u{{1TX?Y"Z+%++b"={u{{1TX?Y!Z  )+ A 13h<~"L hlI=X@	< >% %8' '"#J0@R$@F:k 777+,2-3/Q  L  DE" rs0   Y  Y. Y>Y> Y+*Y+.	Y;:Y;