
    zIgd                         d dl Z d dlmZmZ d dlZd dlZd dlZ	 d dlm	Z	 ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZ ddlmZ  G d	 d
      Z G d d      Zy# e
$ r dZ	Y Jw xY w)    N)AnyDict)to_array_extended   )
TensorData)	ONNXModel)ONNX_TYPE_TO_NP_TYPETENSOR_NAME_QUANT_SUFFIX	QuantTypefind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   4    e Zd Zdeeef   fdZd Zd Zd Z	y)QuantizationParamsdatac                    i | _         |j                         D ]  \  }}t        |t              st	        dt        |       d|d      t        |t        t        t        j                  f      st	        dt        |       d|d      |dk(  rG|j                  t        j                  t        j                  fvrt        d|j                   d|      || j                   |<    y )NzKeys must be strings not z for k=.z1Values must be numpy arrays, int, float, str not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kvs       d/var/www/html/answerous/venv/lib/python3.12/site-packages/onnxruntime/quantization/base_quantizer.py__init__zQuantizationParams.__init__&   s    	JJL 	DAqa%";DG9GA5PQ RSSa#sBJJ!78"STXYZT[S\\cdechhi jkkG|

BJJ/G G #XYZY`Y`Xaahijhm!nooDIIaL	    c              #   8   K   | j                   E d {    y 7 wNr   r'   s    r*   __iter__zQuantizationParams.__iter__1   s     99s   c                      | j                   |   S r.   r/   )r'   keys     r*   __getitem__zQuantizationParams.__getitem__4   s    yy~r,   c                 ,    t        | j                        S r.   )lenr   r0   s    r*   __len__zQuantizationParams.__len__7   s    499~r,   N)
__name__
__module____qualname__r   r   r   r+   r1   r4   r7    r,   r*   r   r   %   s%    	tCH~ 	r,   r   c                   \    e Zd Z	 ddZd Zd Zd Zd Zd Zd Z	dd	Z
dd
Z	 	 ddZd Zy)BaseQuantizerNc                    t        |      st        |      }|j                  j                  D ci c]  }|j                  | c}| _        | j
                  j                  |j                  j                  D ci c]  }|j                  | c}       | j
                  j                  |j                  j                  D ci c]  }|j                  | c}       t        |      | _
        || _        || _        |
r|
ni | _        d| j                  v xr | j                  d   | _        d | _        d| j                  v xr | j                  d   | _        | j                  j#                  d|t$        j&                  t$        j(                  t$        j*                  fv       | _        | j                  j#                  dd      | _        | j                  j#                  d      | _        t3        |d|      | _        t3        |d|      | _        	 |Qt9        t;        d |j=                                     r-t?        d	tA        d
 |j=                         D               d      || _!        || _"        || _#        |	| _$        | jK                         | _&        tO        | j                  j#                  di             | _(        | j                  jS                         D ci c]  }|j                  | c}| _*        | jP                  jW                  | jT                  | j
                  jY                         |      \  }}|st[        |      | jP                  j]                         | _/        y c c}w c c}w c c}w c c}w )NEnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec                 $    t        | t               S r.   )r   r   )ts    r*   <lambda>z(BaseQuantizer.__init__.<locals>.<lambda>o   s    z!Z?X;X r,   z(tensors_range contains unexpected types c              3   2   K   | ]  }t        |        y wr.   )r   ).0r)   s     r*   	<genexpr>z)BaseQuantizer.__init__.<locals>.<genexpr>q   s     >g1tAw>gs   z, not TensorData.TensorQuantOverrides)0r   r   graph
value_infonamevalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkgetr   QInt8QInt16QFLOAT8E4M3FNis_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanymapvaluesr   settensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerinitializersis_validkeysr&   get_quant_typestensor_quant_override_qtypes)r'   rS   rT   rU   rc   rb   rh   ri   rj   rk   rV   viotitinitzeroverrides_validoverrides_errs                    r*   r+   zBaseQuantizer.__init__<   s     (.:5AE27++2H2HIBBGGRKIu{{7I7I J" JKu{{7H7H I" IJu%
&(.;] 2 22[t7I7IJZ7[ 	) '4+=+==q$BTBTUpBq 	* $(#5#5#9#9|	AQAQS\SjSj/kk$
  (,'9'9'='=>SUZ'[$"00445GH '(8-IY Z#L-N
	 $S1XZgZnZnZp-q)r:3>gP]PdPdPf>g;g:hhyz  +!2 0$8!!557 'AASASAWAWXnprAs&t#BF**BXBXBZ[wW\\72[)-)D)D)M)Mt//4468H*
& ]++,0,G,G,W,W,Y)s J J I` \s   M8M# M(M-c                     t         r.   )NotImplementedErrorr0   s    r*   quantize_modelzBaseQuantizer.quantize_model   s    !!r,   c                 R    t        || j                  j                               }|d uS r.   )r   rS   ro   )r'   
input_namero   s      r*   is_input_a_initializerz$BaseQuantizer.is_input_a_initializer   s&    ":tzz/E/E/GH$&&r,   c                     | j                   S r.   )rT   r0   s    r*   is_per_channelzBaseQuantizer.is_per_channel   s    r,   c                 6   t        || j                  j                               }|@|j                  t        j
                  j                  t        j
                  j                  fv S | j                  r| j                  y| j                  j                  |      S )NF)r   rS   ro   	data_typeonnxTensorProtoFLOATFLOAT16rW   rX   is_valid_quantize_weight)r'   weight_nameweights      r*   r   z&BaseQuantizer.is_valid_quantize_weight   sy    k4::+A+A+CD##(8(8(>(>@P@P@X@X'YYY11t{{7J{{33K@@r,   c                     | j                   1t        | j                         dk7  r|j                  | j                   vry|j                  | j                  vry| j
                  |j                  | j
                  v ryy)Nr   FT)ri   r6   rN   op_typerk   rj   )r'   nodes     r*   should_quantize_nodez"BaseQuantizer.should_quantize_node   sn    "".D**+q0		!7!77<<t888  ,d>S>S1Sr,   c                 4   | j                   j                   j                  D cg c]   }|j                  r|j                  dk(  s|" }}t        |      dk7  rt	        d      |d   j
                  }|dk(  rt        j                  d| d       y|dk  rt        j                  d| d       | j                   j                   j                  j                  |d          | j                   j                   j                  j                  t        j                  j                  d	d
      g       d
}|dk  r| j                  t        j                  j                  k(  rt        j                  d| d       | j                   j                   j                  j                  |d          | j                   j                   j                  j                  t        j                  j                  d	d      g       d| j                   j                   _        d}|S c c}w )Nzai.onnxr   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )rS   opset_importdomainr6   r&   versionloggingwarningremoveextendr   helpermake_opsetidrc   r   FLOAT8E4M3FN
ir_version)r'   opsetai_onnx_domainrm   s       r*   rl   z!BaseQuantizer.check_opset_version   s   #zz//<<
ELLTYT`T`dmTmE
 
 ~!#CDD&q)11BOO6}o  Fk  l 2OO6}o  Fe  f JJ))001BCJJ))00$++2J2J2r2R1STM2$"3"3t7G7G7T7T"TOO6}o F5 5
 JJ))001BCJJ))00$++2J2J2r2R1ST*+DJJ'MA
s    HHc                 f   t        || j                  j                               }t        |      }|t        z   }| j
                  t        j                  j                  k(  r0t        j                  |      }|j                  t        j                  k(  rt        j                  j                  }	nQ|j                  t        j                  k(  rt        j                  j                  }	nt!        d|j                   d      |j#                  t        j                        }
t        j$                  dg|
j                        }|j'                  d      }t        j(                  j+                  |
|      }| j                  j-                  |g       d}n||z  |z  }t        j                  |      |z  j/                         j#                  t        j0                        }
t        j                  |
t        j0                        j'                  |j2                        }t        j(                  j+                  ||      }| j                  j-                  |g       t        j                  ||j                        j'                  d      }d}| j
                  }	|dz   }t        j(                  j+                  ||      }| j                  j-                  |g       | j
                  t        j                  j                  k(  r| j
                  }nt        j                  j4                  }|d	z   }| j
                  t        j                  j                  k(  r/t        j6                  j9                  || j
                  dgd
g      }n|j:                  dkD  r_t        j<                  |j>                  t        j0                        j'                  d      }t        j(                  j+                  ||      }n#t        j6                  j9                  ||g dg      }| j                  j-                  |g       ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r#   CastDequantizeLinear_scale_zero_point        r   ) r   rS   ro   r   r
   rc   r   r   r   r!   asarrayr#   r%   r   r$   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendroundint32dimsINT32r   make_tensorsizezerosshape)r'   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_typebias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrD   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datas                         r*   quantize_bias_static_implz'BaseQuantizer.quantize_bias_static_impl   s]    (	4::3I3I3KL)*:;	'*BB  0 0 = ==::i(DzzRZZ'!--55
rzz)!--33
"ghlhrhrgsst uvv![[4N1#^-A-ABJ(004O&*&7&7&B&B>Sf&g#JJ))+B*CDI %|3d:J jj3j@GGIPPQSQYQYZN ::nBHHEMMN^NcNcdL&*&7&7&B&B<Qd&e#JJ))+B*CD !jj9??KSSTVWO*I**J %8($B!(,(9(9(D(D_Vo(p%

%%'D&EF  0 0 = ==++K**00K!4}!D 0 0 = ==)-)@)@AWY]YjYjmnloruqv)w&__q 88J$4$4BHHEMMbQL)-):):)E)ElTj)k&)-)@)@AWYdfhkljm)n&

%%'A&BC  %"
 	
r,   c                    |j                   t        z   }|j                   dz   }|j                   dz   }t        |      }| j                  j	                  |j                   i       }	d|	v r|	d   j
                  }d|	v rd|	v rt        j                  |	d   t        |         }
t        j                  |	d         }t        ||j                         ||
      }t        |
t        j                        sJ dt        |
              |
j                  t        j                  k7  r|
j                  t        j                   k7  sJ d	|
j                          t        |t        j                        sAJ dt        |              t#        |j                         ||	j%                  d
| j&                        |	j%                  d| j(                  xr |      | j*                  |	j%                  d      |	j%                  d            \  }}}
}}t        |
t        j                        sJ dt        |
              |
j                  t        j                  k7  r|
j                  t        j                   k7  sJ d	|
j                          t        |t        j                        sJ dt        |              |j,                  }t.        j0                  j3                  ||g |j5                  d      j7                               }t.        j0                  j3                  ||g |
j5                  d      j7                               }| j8                  j;                  ||g       |s| j<                  t.        j>                  j@                  k(  r,t/        j>                         }| j<                  |_        |jB                  jE                  |jB                         ||_         |j                         jG                         jI                         |_%        tL        tM        |      }|jN                  |jN                  k7  s"|jI                         |jI                         k7  rtQ        d|jN                   d|jI                         dd  d|jI                         dd  d|jN                   dtS        |      dd  d      |t.        j>                  jT                  t.        j>                  jV                  fv r|j                  t        jX                  t        jZ                  fvrtQ        d| d      t]        t_        |jI                                     }t.        j0                  j3                  |||jB                  |d      }nmt        j`                  |t.        j0                  jc                  |            j5                  |jB                        }t.        jd                  jg                  ||      }| j8                  j;                  |g       |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricrU   rminrmaxrU   r`   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)4rN   r
   r   rn   get_per_tensor_overridesrD   r!   r   r	   r   flattenr   r"   r   r#   r$   r%   r   rZ   r^   rU   r`   r   r   r   r   r   tolistrS   r   rc   r   r   r   r   copytobytesraw_datar   r   RuntimeErrorr   INT4UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r'   r   qTyperU   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_data_scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datas                       r*   quantize_initializer_implz'BaseQuantizer.quantize_initializer_impl  s.    &>>++-[[8+
 ,F355NNv{{hjNk?*#L1==Eo%,/*I/,"?G[\aGbcJHH_W56E,UK4G4G4I5R\]Mj"**5\9I$zJZI[7\\5  BJJ.:3C3Crzz3Q6":#3#3"456QeRZZ0R4DT%[M2RR0 6C##%##K1I1IJ,00ARARAcWcd#22-11&9-11&962Aq*e] j"**5\9I$zJZI[7\\5  BJJ.:3C3Crzz3Q6":#3#3"456QeRZZ0R4DT%[M2RR0&& KK33JRQVQ^Q^_dQeQlQlQno;;227E2zGYGYZ_G`GgGgGij

%%'8:J&KL   D$4$4$A$AA'+'7'7'9$151B1B$.$))00=,9$)0=0E0E0G0L0L0N0V0V0X$-$0 ..BCE{{k&7&775==?mNcNcNe;e*78I8I7JJk,446s;<F5==?SVTVCWBXXcdjdpdpcq$S)=%>t%D$EQH 
 4++00$2B2B2H2HII &&rww.AA&0?mn  $$6}7L7L7N$OP (,{{'>'>}eU[U`U`bmsw'>'x$ "

=@d@dej@k l t tKK! (,'8'8'C'CMS`'a$JJ))+?*@Agz11r,   c                    t        || j                  j                               }|t        d|      t	        |      }t        |j                        }t        ||      \  }	}
|	st        d| d| d|       |
}|j                  |   }| j                  j                  |d|ig      }t        |      }|dk7  r||k7  rt        d| d	| d
      t        |d   d   |      \  }}|r||k7  rt        d| d| d|d   d    d      d|d   v r|d   d   j                  }|d   j                  d| j                  xsO |t        j                  j                  t        j                  j                   t        j                  j"                  fv       }|d   j                  d| j$                  xr |      }g }g }g }t'        |      D ]  }|j)                  ||      }||k  r|nd}||   }d|v r0d|v r+t+        j,                  |d   t.        |         }t+        j,                  |d         }t1        ||j3                         ||      }t5        |t*        j6                        sJ dt9        |              |j:                  t*        j<                  k7  r|j:                  t*        j>                  k7  sJ d|j:                          t5        |t*        j6                        sJ dt9        |              t5        |t*        j6                        s6J dt9        |              tA        |j3                         |||| jB                  |j                  d      |j                  d            \  }}}}}t5        |t*        j6                        sJ dt9        |              |j:                  t*        j<                  k7  r|j:                  t*        j>                  k7  sJ d|j:                          t5        |t*        j6                        sJ dt9        |              t5        |t*        j6                        sJ dt9        |              |jE                  |       |jE                  |       |jE                  |        tG        |j                        }tG        |      }d||<   t+        jH                  |d         jK                  |      }t'        dt        |            D ]A  }t+        jH                  ||         jK                  |      }t+        jL                  ||f|      }C |tN        z   } |dz   }!|dz   }"|jP                  |   g}#t        jR                  jU                  |"|jV                  |#t+        jX                  |      j[                               }$t        jR                  jU                  |!||#t+        jX                  |      j[                               }%| j                  j]                  |$|%g       |s]|t        j                  j"                  t        j                  j^                  fv r|j:                  t*        j`                  t*        jb                  fvrte        d|  d      tg        ti        |jk                                     }&t        jR                  jU                  | |||&d       }'| j                  j]                  |'g       nt+        jH                  |t        jR                  jm                  |            jK                  |jP                        }t        jn                  jq                  ||       }'| j                  j]                  |'g       | |!|"fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank axisr   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rU   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )9r   rS   ro   r&   r   r6   r   r   rn   get_per_channel_overridesrD   rZ   r^   r   r   INT8r   r   rU   rangetaker!   r   r	   r   r   r   r"   r   r#   r$   r%   r   r`   appendlistr   r   concatenater
   r   r   r   r   hstackr   r   r   r   r   r   r   r   r   r   r   r   )(r'   r   rc   channel_axisrU   r   ro   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_datar   weights_shapereshape_dimsquantized_weightschannel_weightsr   r   r   zero_scale_shaper   r   r   r   s(                                           r*    quantize_weight_per_channel_implz.BaseQuantizer.quantize_weight_per_channel_impll  s_    #;

0F0F0HI7EE'47==)#1,#M y+&I, X**69 
 !l3'+'B'B'\'\v|&<%= (] (
$ !$$@ A A%*?=*P@ N,o-SU 
 1??[\]?^_e?fht0u-%,)F4[M B(>4PQR4STZ4[3\\]_  7::7:<HTTL0377(( qD$4$4$9$94;K;K;X;XZ^ZjZjZoZo#pp	
	 4A6::>4K\K\Kmamn
*,'}% *	OA&||A|<*+.C*CQ"&BCY&Z#11lF]6]XX&=l&KSghtSuv
!8!AB-= "2":":"<eZ.* "*bjj9`=MdS]N^M_;``9$$

2z7G7G2::7U:&z'7'7&89:U!%4V8He6VV4!.

 I%d+E&F%GHI 
 GT$,,. !-#'#6#6"9"="=f"E"9"="=f"EGC1j%)C "*bjj9`=MdS]N^M_;``9$$

2z7G7G2::7U:&z'7'7&89:U!%4V8He6VV4!.

 I%d+E&F%GHI  "":.e$+223MNU*	OZ W]]+M*%&\"JJ'Fq'IJRRS_`q#=>? 	cA jj)H)KLTTUabO "0A?/SUa b	c $&>>- 8+
 (,,\:; KK33--/?:AVA]A]A_
  ;;22\#3RYY5O5V5V5X
 	

%%'8:J&KL  0 0 5 5t7G7G7M7MNN$**277BHH2EE&0?mn  $$67H7P7P7R$ST (,{{'>'>!<QU (? ($ 

--/C.DE$&JJ%++>>|L% '+**+ " (,'8'8'C'CDUWd'e$

--/C.DEgz11r,   c                 6   | j                   y | j                  j                         D ]n  }|j                  dv r| j                  r| j                  |      s1t        | j                  j                         |j                  d            dk7  rh|j                  d   | j                   vs|j                  d   | j                   vr| j                   |j                  d      }t        |t              s(t        dt        |       d|j                  d   d      || j                   |j                  d   <   |j                  dk(  s"t        t        j                  d      t        j                  d	      
      | j                   |j                  d   <   q y )N)ClipRelur   r   r   z for r   Softmaxr         ?)lowesthighest)rh   rS   nodesr   r_   r   r6   input_name_to_nodesrR   rQ   r   r   r   r   r!   r$   )r'   r   tds      r*   adjust_tensor_rangesz"BaseQuantizer.adjust_tensor_ranges  sW   %JJ$$& 	qD||////006tzz557

1FG1L::a=(:(::dkk!nTXTfTf>f''A7!"j1#&6tBxjdkkRSnEWWX$YZZ46""4::a=1*5?rzzRU`b`j`jkn`o5p""4;;q>2#	qr,   r.   )r#  )FF)TF)r8   r9   r:   r+   r}   r   r   r   r   rl   r   r   r  r)  r;   r,   r*   r=   r=   ;   sR     HZT"' A !FF
PW2| U2nqr,   r=   )r   typingr   r   numpyr!   r   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   rn   r   r   r=   r;   r,   r*   <module>r2     so        7
 " !    ? ,]q ]qY  s   A! !A+*A+