
    Ig!                         d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 ddl
mZ ddlmZ erddlmZ dd	lmZmZmZmZmZ dd
lmZ  e       rd dlZ ej2                  e      Z G d de      Zy)    N)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_quanto_availableis_torch_availablelogging)QuantoConfigc            
       6    e Zd ZdZddgZdZdZdef fdZd Z	d	 Z
d
 Zd"dZdee   dedee   fdZdddddedeeef   def
dZdeeeeef   f   deeeeef   f   fdZdddddeddfdZd#dZg fdddee   fdZd Zed$ded   fd        Zd$d!Z xZS )%QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                 F    t        |   |fi | | j                          y N)super__init__	post_init)selfr   kwargs	__class__s      e/var/www/html/answerous/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__3   s     ,77    c                 `    | j                   j                  | j                  st        d      yy)z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueError)r   s    r"   r   zQuantoHfQuantizer.post_init7   s;     ##//;DDVDVO  EW;r#   c                 l    t               st               st        d      t               st        d      y )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   r   ImportErrorr   )r   argsr    s      r"   validate_environmentz&QuantoHfQuantizer.validate_environmentA   s;    +-1D1Fz  '(r  )r#   c                 <    |ddi}t         j                  d       |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r"   update_device_mapz#QuantoHfQuantizer.update_device_mapK   s+    eJKK\
 r#   returnc                 T    |%t         j                  d       t        j                  }|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r/   r0   torchfloat32)r   torch_dtypes     r"   update_torch_dtypez$QuantoHfQuantizer.update_torch_dtypeU   s$    KKpq--Kr#   missing_keysprefixc                    t               rddlm} n%t               rt        j                  d       ddlm} g }|j                         D ]\  \  }}t        |      s|D ]E  }||v s
|| d| v s|j                  d      r#|j                  d      r5|j                  |       G ^ |D 	cg c]	  }	|	|vs|	 c}	S c c}	w )Nr   QModuleMixinuImporting from quanto will be deprecated in v4.47. Please install optimum-quanto instrad `pip install optimum-quanto`.z.weightz.bias)r   optimum.quantor=   r   r/   warning_oncer   named_modules
isinstanceendswithappend)
r   modelr9   r:   r=   not_missing_keysnamemodulemissingks
             r"   update_missing_keysz%QuantoHfQuantizer.update_missing_keys[   s    &(3 " H ,!//1 	9LD&&,/+ 9GDvhay4I,I ' 0 0 ; ' 0 0 9(//89	9 (Ea14D+DEEEs   ,	B=6B=rF   r   param_valueztorch.Tensor
param_name
state_dictc                    t               rddlm} n%t               rt        j                  d       ddlm} |j                  dd      }|j                  dd      }|<|:t        |j                               }	|dk(  rt        |	      dkD  r|	dhk(  s|	dd	hk(  sy
t        ||      \  }
}t        |
      rd|v r|
j                   S y
)z=
        Check if a parameter needs to be quantized.
        r   r<   r>   r1   Nparam_devicer.   r
   diskFweight)r   r@   r=   r   r/   rA   r   getsetvalueslenr   rC   frozen)r   rF   rM   rN   rO   r    r=   r1   rQ   device_map_valuesrI   tensor_names               r"   check_quantized_paramz'QuantoHfQuantizer.check_quantized_paramp   s     '(3 " H ,ZZd3
zz.$7!l&> #J$5$5$7 8u$->)?!)C)eW48IeU[_8\ 25*Efl+K0G}}$$r#   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   r\   keyvals       r"   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memory   s6    6@6F6F6HI(#sc3:oI
I Js   )target_deviceztorch.devicec                     ddl m}  |||||       t        ||      \  }}	|j                          d|j                  _        y)ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsrd   r   freezerS   requires_grad)
r   rF   rM   rN   rb   r*   r    rd   rI   _s
             r"   create_quantized_paramz(QuantoHfQuantizer.create_quantized_param   s;     	A#E:}kR(
;	&+#r#   c                 P   t        j                  t        j                  j                  d            t        j                  d      kD  rTddlm} t        j                  |j                  |j                  |j                  d}|| j                  j                     }|S t        d      )Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r	   parse	importlibmetadatare   rk   r5   rl   FP8INT4INT2r   weightsr'   )r   target_dtyperk   mappings       r"   adjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtype   s    ==++33LABW]]S[E\\4 

%//#((#((	G #4#;#;#C#CDLP r#   keep_in_fp32_modulesc                    ddl m}m} | j                  j                   ||      | _        n| j                  j                  | _        t        | j                  t              s| j                  g| _        | j                  j                  |        ||| j                  | j                        \  }}| j                  |j                  _        y )Nr   )get_keys_to_not_convertreplace_with_quanto_layers)modules_to_not_convertr   )	integrationsr|   r}   r   r~   rC   listextendconfig)r   rF   rz   r    r|   r}   rh   s          r"   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loading   s     	W ##::B*A%*HD'*.*B*B*Y*YD'$55t<+/+F+F*GD'##**+?@-$*E*E[_[s[s
q ,0+C+C(r#   c                     |S r    r   rF   s     r"   #_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loading   s    r#   c                      y)NTr   r   s     r"   is_trainablezQuantoHfQuantizer.is_trainable   s    r#   c                      y)NFr   )r   safe_serializations     r"   is_serializablez!QuantoHfQuantizer.is_serializable   s    r#   )r7   torch.dtyper3   r   )rw   r   r3   r   r   ) __name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r+   r2   r8   r   strrL   r   r   boolr[   r   intra   ri   ry   r   r   propertyr   r   r   __classcell__)r!   s   @r"   r   r   *   sc    "<0'+$ L FtCy F# FRVWZR[ F*" " $" 	"
 cN" 
"HDeCHo1E,F 4PSUZ[^`c[cUdPdKe , , $, 	,
 &,&( KMD&D>B3iD* (+<"=  r#   r   )rq   typingr   r   r   r   r   r   	packagingr	   baser   quantizers_utilsr   modeling_utilsr   utilsr   r   r   r   r   utils.quantization_configr   r5   
get_loggerr   r/   r   r   r#   r"   <module>r      s[     B B   2 0  5 			H	%q qr#   