
    ##h                         d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	 ddl
mZmZmZmZ ddlmZ  e       rd d	lZ ej$                  e      Z G d
 de      Zy	)    )TYPE_CHECKINGListOptional   )HfQuantizer   )PreTrainedModel)replace_with_spqr_linear)is_accelerate_availableis_spqr_availableis_torch_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdef fdZd ZddZ	 dddd	e	e
e      fd
ZddZedde	d   fd       ZddZ xZS )SpQRHfQuantizerzS
    Quantizer of the SpQR method. Enables the loading of prequantized models.
    Tquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      c/var/www/html/sandstorm/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_spqr.pyr   zSpQRHfQuantizer.__init__(   s    ,77#6     c                     t         j                  j                         st        d      t	               st        d      t               st        d      y )Nz,GPU is required to run SpQR quantized model.zGUsing `spqr` quantization requires Accelerate: `pip install accelerate`zFUsing `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`)torchcudais_availableRuntimeErrorr   ImportErrorr   )r   argsr   s      r   validate_environmentz$SpQRHfQuantizer.validate_environment,   sG    zz&&(MNN&(ghh "fgg #r   c                     |'t         j                  }t        j                  d       |S |t         j                  k7  rt	        d      |S )NzHAssuming SpQR inference on GPU and loading the model in `torch.float16`.z|You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.)r   float16loggerinfo
ValueError)r   torch_dtypes     r   update_torch_dtypez"SpQRHfQuantizer.update_torch_dtype6   sN    --KKKbc  EMM),  r   modelr	   keep_in_fp32_modulesc                     | j                  || j                  j                  |      | _        t        || j                  | j                         | j                  |j                  _        y )N)r   modules_to_not_convert)get_modules_to_not_convertr   r.   r
   config)r   r+   r,   r   s       r   $_process_model_before_weight_loadingz4SpQRHfQuantizer._process_model_before_weight_loadingA   s^     '+&E&E4++BBDX'
# 	! $ 8 8#'#>#>	

 ,0+C+C(r   c                     |S r    )r   r+   r   s      r   #_process_model_after_weight_loadingz3SpQRHfQuantizer._process_model_after_weight_loadingR   s    r   c                      y)NFr3   )r   r+   s     r   is_trainablezSpQRHfQuantizer.is_trainableU   s    r   c                      y)NTr3   )r   safe_serializations     r   is_serializablezSpQRHfQuantizer.is_serializableY   s    r   )r)   torch.dtypereturnr:   r   )r+   r	   )__name__
__module____qualname____doc__requires_calibrationr   r   r#   r*   r   r   strr1   r4   propertyr6   r9   __classcell__)r   s   @r   r   r   !   s{      7,C 7h	 59D D 'tCy1D" (+<"=  r   r   )typingr   r   r   baser   modeling_utilsr	   integrationsr
   utilsr   r   r   r   utils.quantization_configr   r   
get_loggerr<   r&   r   r3   r   r   <module>rK      sM    1 0  0 3 [ [ ? 			H	%9k 9r   