
    h!                         d dl mZmZmZ ddlmZ ddlmZ ddlm	Z	 erddl
mZ ddlmZmZmZmZmZ dd	lmZ  e       rd d
lZ ej*                  e      Z G d de      Zy
)    )TYPE_CHECKINGAnyOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                       e Zd ZdZdZdZddgZdef fdZd Z	dd
Z
	 d dddddedddeeef   deee      fdZ	 d dddeee      fdZd!dZdee   ded	ee   fdZed	efd       Zd dZdddddedeeef   d	ef
dZd Z xZS )"HiggsHfQuantizerz
    Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTzflute-kernelfast_hadamard_transformquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      e/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_higgs.pyr   zHiggsHfQuantizer.__init__+   s    ,77#6     c                 n   t         j                  j                         st        d      t	               st        d      t               st        d      t               st        d      |t        d      t        |t              r0d|j                         v sd|j                         v rt        d      y y )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environmentz%HiggsHfQuantizer.validate_environment/   s    zz&&(%&vww&(hii!#lmm$&t  F  
D)u
8I8I8K/KvYcYjYjYlOld  Pm)r   returnc                     |'t         j                  d       t        j                  }|S |t        j                  k7  r"|t        j                  k7  rt        d| d      |S )NzG`dtype` is None. Setting `dtype=torch.float16` for FLUTE compatibility.zInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)loggerinfor"   float16bfloat16r'   )r   dtypes     r   update_dtypezHiggsHfQuantizer.update_dtypeI   s^    =KKabMME  emm#(?"5')|}  r   modelr   param_valueztorch.Tensor
param_nametarget_deviceztorch.device
state_dictunexpected_keysc                 (   ddl m} 	  ||j                  |      | j                  j                  | j                  j
                  | j                  j                  | j                  j                        }~t        ||      \  }	}
dj                  |j                  d      d d       }|j                         D ]  \  }}||	j                  v r/t        j                  j                  |d      |	j                  |<   C||	j                   v r-t        j                  j#                  |      |	j                   |<   ~|dk(  r/||	_        |j'                         | j                  j$                  |<   t)        d| d	|	        |||v r|j+                  |       y y y )
Nr   )quantize_with_higgs.F)requires_gradtune_metadatazUnexpected key z in module )integrationsr<   tor   bitsp
group_sizehadamard_sizer
   joinsplititems_parametersr"   nn	Parameter_buffersBufferr@   to_dictr'   remove)r   r5   r6   r7   r8   r9   r:   r<   
flute_dictmodule_module_namekeyvalues                 r   create_quantized_paramz'HiggsHfQuantizer.create_quantized_paramT   sw    	7	 )NN=)$$))$$&&$$//$$22

 (
;	hhz//4Sb9:$**, 		MJCf(((*/((*<*<URW*<*X""3''',xxu'=$'',$FKmmo((66{C ?3%{6(!KLL		M &:+H"":. ,I&r   keep_in_fp32_modulesc                     ddl m} | j                  || j                  j                  |      | _         ||| j                  | j                         | j                  |j
                  _        y )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)rA   rZ   get_modules_to_not_convertr   r[   config)r   r5   rX   r   rZ   s        r   $_process_model_before_weight_loadingz5HiggsHfQuantizer._process_model_before_weight_loading{   sa     	=&*&E&E4++BBDX'
# 	" $ 8 8#'#>#>	

 ,0+C+C(r   c                     ddl m}m} ddlm} ddlm} i }|j                         D 	ci c]  \  }}	t        |	|      s||	 }
}}	t        |
j                         dd      D ]"  \  }}	|	j                  j                  |vr4 ||	j                  j                  	      ||	j                  j                  <   ||	j                  j                     |	_        |j                  | j                  j                   |         |	_         ||	j                  j"                  |	j$                  j"                  |	j                   
      \  |	j                  _        |	_        |	j                   j'                         | j                  j                   |<   % y c c}	}w )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearzRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tuner`   ra   flute.utilsrb   rA   rd   named_modulesr(   r   rI   rh   rg   	workspace	from_dictr   r@   datari   rO   )r   r5   r   r`   ra   rb   rd   flute_workspacesnamerR   flute_moduless              r   #_process_model_after_weight_loadingz4HiggsHfQuantizer._process_model_after_weight_loading   sN   B6.:?:M:M:Os,$S]^dfqSrvss !4!4!6=V^cd 	ZLD& }}##+;;9OW]WdWdWkWk9l !5!56/0D0DEF $0#9#9$:R:R:`:`ae:f#gF 7L}}))}}))--84FMM 4
 <B;O;O;W;W;YD$$2248	Z ts
   F
F
missing_keysprefixc                    	 ddl m} |j                         D ch c]  \  }}t        ||      s| c}}	dt        dt
        f	fd}|D cg c]  } ||      r| c}S c c}}w c c}w )Nr   rc   rU   r-   c                       j                  d      s j                  d      ry d  t         fdD              S )Nz.weightz.biasFr=   c              3   2   K   | ]  }|v xs |v   y wr    ).0rr   full_keyrU   s     r   	<genexpr>zNHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>   s"     O4ts{6dh&66Os   )endswithany)rU   r|   higgs_namesrv   s   `@r   should_updatez;HiggsHfQuantizer.update_missing_keys.<locals>.should_update   s>    ||I&#,,w*? 3%(HO;OOOr   )rA   rd   rm   r(   strbool)
r   r5   ru   rv   rd   rr   rR   r   rU   r   s
      `     @r   update_missing_keysz$HiggsHfQuantizer.update_missing_keys   sj    .050C0C0EifTZ\gIhti	Ps 	Pt 	P  ,F=3EFF j Gs   A%A%A+A+c                      y)NFrz   )r   s    r   is_trainablezHiggsHfQuantizer.is_trainable   s    r   c                      y)NTrz   )r   safe_serializations     r   is_serializablez HiggsHfQuantizer.is_serializable   s    r   c                     ddl m} t        ||      \  }}t        ||      r#|dk(  r|j                  t
        j                  k7  ryy)Nr   rc   rh   TF)rA   rd   r
   r(   r3   r"   int16)	r   r5   r6   r7   r9   r   rd   rR   tensor_names	            r   check_quantized_paramz&HiggsHfQuantizer.check_quantized_param   sC     	/25*Efk*{h/F;K\K\`e`k`kKkr   c                 "    ddl m}  ||      }|S )Nr   )dequantize_higgs)rA   r   )r   r5   r   s      r   _dequantizezHiggsHfQuantizer._dequantize   s    3 'r   )r3   torch.dtyper-   r   r   )r5   r   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationrequired_packagesr   r   r,   r4   r   r)   r   r   listrW   r^   rt   r   propertyr   r   r   r   r   __classcell__)r   s   @r   r   r   "   sH    !'+$')BC7,C 74	$ 04%/ %/ $%/ 	%/
 &%/ cN%/ "$s),%/T 59D D 'tCy1D&Z2GtCy G# GRVWZR[ G d    $ 	
 cN 
"r   r   )typingr   r   r   utils.loggingr   baser	   quantizers_utilsr
   modeling_utilsr   utilsr   r   r   r   r   utils.quantization_configr   r"   
get_loggerr   r/   r   rz   r   r   <module>r      sR    0 /    2 0 s s ? 			H	%n{ nr   