
    ##h0                         d dl mZmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ erddlmZ  e       rd d	lmZ  e
       rd d
lZ ej*                  e      Zd Z G d de      Zy
)    )TYPE_CHECKINGAnyDictList   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 ^    |j                  d      d d }| }|D ]  }|j                  |   } |S )N.)split_modules)modelnamemodule_treeparentms        b/var/www/html/sandstorm/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parentr   %   s=    **S/#2&KF $#$M    c                       e Zd ZdZdZdZdZdgZ fdZd Z	ddd	e
e   d
ede
e   fdZddde
e   de
e   de
e   fdZdddddedeeef   def
dZdddddedddeeef   de
e   fdZd Z	 	 ddZddZddZedefd       Z xZS ) HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                 B    t        |   |fi | d | _        d| _        y )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__s      r   r$   zHqqHfQuantizer.__init__9   s&    ,77$r   c                 `   t               st        d      |j                  dd      s|j                  dd      rt        d      t        j
                  j                         st        d      | j                  9d|v r|d   | _        n*t        j                  | _        t        j                  d       |j                  d	d       }t        |t              rZd
|j                         v sd|j                         v rt        d      t        t!        |j                                     dkD  | _        y y )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r%   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r
   ImportErrorget
ValueErrortorchcudais_availableRuntimeErrorr%   float32loggerinfo
isinstancedictvalueslensetr&   )r'   argsr)   r.   s       r   validate_environmentz#HqqHfQuantizer.validate_environment>   s    " T  ::i'6::k5+I; 
 zz&&(PQQ#&#)-#8 #(== stZZd3
j$'
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   r   r   missing_keysprefixreturnc                 R    | j                   r|D cg c]	  }d|vs| c}S |S c c}w )Nweight)pre_quantized)r'   r   rB   rC   r)   keys         r   update_missing_keysz"HqqHfQuantizer.update_missing_keys^   s1     #/ICHC4GCII Js   	$$expected_keysloaded_keysc                    | j                   s|S fdt        |      }t               rNddlm} |j                         D ]  \  }}||_         t               } ||       t               }	|D ]6  }
|j                  j                  d   D ]  }||
v s|	j                  |
        8 ||	z  } |d d t        j                  d      j                         dhz
  }t               }|D ](  t        fd|D              s|j                         * ||z  }|D ]_  }
|
d	z   |v r|j                  |
d	z          n%|j                  |D ch c]
  }|
d
z   |z    c}       |
dz   |v sL|j                  |
dz          a t        |      S c c}w )Nc                     | j                         D ]M  \  }}t        |t        j                  j                        r|j                  |j                          ||       O y N)named_childrenr;   r4   nnLinearaddr   )r   layersr   module_find_hqq_quantizable_layerss       r   rU   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersn   sK     % 4 4 6 =ffuxx8JJv{{+,VV<=r   r   	HQQLinearskip_modulesr/   linear_layerquant_configcompute_dtypedevicebiasc              3   &   K   | ]  }|v  
 y wrN    ).0_modulerH   s     r   	<genexpr>z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     D'w#~Ds   z.weightr   z.bias)rG   r?   r
   hqq.core.quantizerW   named_modulesr   configr(   rR   r4   float16state_dict_keysanyupdatelist)r'   r   rJ   rK   new_keysrW   r   rT   _valid_modules_skipped_modulesrb   _skip_module	_ref_keys_rm_keys_ref_keyrU   rH   s                  @@r   update_expected_keysz#HqqHfQuantizer.update_expected_keysg   s    !!  	= }%3 !& 3 3 5 #f"# !UN(?  #u) 6$)LL$D$D^$T 6L#w.(,,W566 ..N "!EMMZ_o6(+I
 uH &D^DDLL%&  H * 4Y&+5LL9!45OOi$X(Ws]X%=$XYW$3LL7!234 H~	 %Ys   F
param_valueztorch.Tensor
param_name
state_dictc                 X   t               rddlm} t        ||      \  }}| j                  r@t        |t        j                  j                        xs t        |      xr |dk7  xr |dk7  S t        |t        j                  j                        xr |dk(  xs t        |      xr |dk(  S )Nr   rV   rF   r^   )	r
   rd   rW   r   rG   r;   r4   rP   rQ   )	r'   r   rt   ru   rv   r)   rW   rT   tensor_names	            r   check_quantized_paramz$HqqHfQuantizer.check_quantized_param   s     325*EFEHHOO4U
698U *8+*6) 6588??3 ,8+Mvy1KkV6Kr   target_deviceztorch.deviceunexpected_keysc           	         t               rddlm} t        ||      \  }}	dj	                  |j                  d      dd       }
t        ||
      }|
j                  d      d   }|	dk(  ryi }|j                         D ]=  \  }}|
dz   |v s|||j                  d      d   <   |(||v s-|j                  |       ? | j                  rt        |      ry |dd| j                  |      }|j                  |       |j                  Rt        |j                  t        j                        r.t        j                   j#                  |j                        |_        | j$                  r| j'                  |      }t)        |||       |`~t        j,                  j/                          y|D ]/  }t)        ||t        j                   j#                  ||                1 |j0                  j2                  d   }|j0                  j2                  d	   }dj	                  |j4                  j                  d      d
d       }d}d|v r|}n	||v r||   }|D ]  }||j4                  v sd} n | ||| j                  |d      }|j                  Rt        |j                  t        j                        r.t        j                   j#                  |j                        |_        | j$                  r| j'                  |      }t)        |||       n*|j7                  | j                  |      }t)        |||       t        j,                  j/                          y)a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rV   r   Nr   r^   rY   r[   rX   weight_quant_paramsT)r[   r\   r]   del_orig)dtyper]   )r
   rd   rW   r   joinr   r   itemsremoverG   r;   r%   load_state_dictr^   r4   TensorrP   	Parameterr&   _patch_layer_for_multigpusetattr__dict__r5   empty_cacherf   r(   r   to)r'   r   rt   ru   rz   rv   r{   rW   rT   rx   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrH   r[   rX   
module_tagmodule_quant_configskip_modules                          r   create_quantized_paramz%HqqHfQuantizer.create_quantized_param   s    325*EXXj..s3CR89
#E:6$R(&  $$& 	.DAqC1$67!!''#,r"23".13G#**1-		. &),%!%!%"&"2"2(		 %%&78~~)j.V!&!3!3INN!C	## ::9E	M43 JJ""$ % 	MCFC!3!34Ec4J!KL	M
 ||77G||77GXXfkk//4RS9:
" L0".<'".z":' 	Kfkk)&*#	
 *!0"..$I ~~)j.V!&!3!3INN!C	## ::9E	M43 YYT%5%5mYLFM40

 r   c                 <    t              d fd_        S )Nc                     t        j                  |j                  | j                        | j	                         j                               }| j                  || j                  z  }|S rN   )r4   matmulr   r]   
dequantizetr^   )r'   xouts      r   forward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device&  sL    ,,qttDKK0$//2C2E2E2GHCyy$tyy Jr   c                      |       S rN   r`   )r   r   r   s    r   <lambda>z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>,  s    &9)Q&G r   )r   forward)r'   r   r   s    `@r   r   z(HqqHfQuantizer._patch_layer_for_multigpu#  s#    +I6		 H	r   c                 2    t        || j                        }y )N)r(   )r   r(   r'   r   r)   s      r   $_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loading/  s     'u$BZBZ[r   c                 >    d|_         | j                         |_        |S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading8  s     !%$($8$8$:!r   c                      yr   r`   )r'   safe_serializations     r   r   zHqqHfQuantizer.is_serializable=  s    r   c                      yr   r`   )r'   s    r   is_trainablezHqqHfQuantizer.is_trainable@  s    r   )r   r   rN   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr$   rA   r   strrI   rs   r   r   boolry   r   r   r   r   r   propertyr   __classcell__)r*   s   @r   r    r    -   sY     %'+$ %
I@ & 6:3i IL 	c 7&77;Cy7OSTWy7	c7r  $ 	
 cN 
4f! f! $f! 	f!
 &f! cNf! cf!R
\ \
 d  r   r    )typingr   r   r   r   integrationsr   utilsr	   r
   r   r   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r4   
get_loggerr   r9   r   r    r`   r   r   <module>r      s]    2 1 1 Z Z  2 0 8			H	%U[ Ur   