
    i                         d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
 ddlmZmZmZmZmZ dd	l	mZ  e       rd d
lZ ej&                  e      Z G d de      Zy
)    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)FPQuantConfig)is_fp_quant_availableis_qutlass_availableis_torch_availableis_torch_xpu_availablelogging)QuantizationConfigMixinNc                        e Zd ZU dZdZdZded<   def fdZd Z	dd	Z
d
ddedefdZ	 	 ddZedd
ed   fd       Zd Zd Zd Z xZS )FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTr
   quantization_configc                 &    t        |   |fi | y N)super__init__)selfr   kwargs	__class__s      {/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   zFPQuantHfQuantizer.__init__+   s    ,77    c                 F   t         j                  j                         st               st	        d      t               s!| j                  j                  st        d      | j                  j                  rf| j                  j                  dk(  rMt         j                  j                         r/t         j                  j                         d   dk  rt        d      | j                  j                  rt        j                  d       t               st        d      |!| j                  j                  st        d	      t        |t               rT| j                  j                  s t#        |      d
kD  rd|j%                         v sd|j%                         v rt        d      y y )Nz]FPQuant quantization is only supported on GPU or Intel XPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.nvfp4r   	   zNVFP4 pseudoquantization requires a GPU with compute capability >= 9.0 (Hopper or newer) because the Triton kernel uses the `fp8e4nv` type. Please use `forward_dtype='mxfp4'` instead, or use a GPU with compute capability >= 9.0.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.r   cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availabler   NotImplementedErrorr   r   pseudoquantizationImportErrorforward_dtypeget_device_capability
ValueErrorloggerwarningr   
isinstancedictlenvalues)r   
device_mapr   s      r   validate_environmentz'FPQuantHfQuantizer.validate_environment.   sv   zz&&(1G1I%o  $%d.F.F.Y.Y S 
 $$77((66'A

'')

002159?  ##66NN ] %&ghhd&>&>&Q&QF  
D),,??
Oa'Z..00Z..00 h  1 *r   returnc                 ~    |t         j                  k7  r)t        j                  d| d       t         j                  }|S )NzSetting dtype to zP, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r"   bfloat16r+   warning_once)r   dtypes     r   update_dtypezFPQuantHfQuantizer.update_dtype^   s9    ENN"#E7*z{ NNEr   modelr	   
param_namec                 P    ddl m} t        ||      \  }}t        ||      r|dv ryy)Nr   )FPQuantLinear)weightqweightdqweightTF)fp_quantr<   r   r-   )r   r9   r:   r   r<   moduletensor_names          r   param_needs_quantizationz+FPQuantHfQuantizer.param_needs_quantizationf   s.    *25*Efm,@a1ar   c                 P    ddl m} ddlm}  || || j                               y )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r@   rE   integrations.fp_quantrF   r   )r   r9   r   rE   rF   s        r   $_process_model_before_weight_loadingz7FPQuantHfQuantizer._process_model_before_weight_loadingp   s#    
 	:A$#89Q9Q#R	
r   c                 `    | j                   j                  }|st        j                  d       |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr+   r,   )r   r9   	trainables      r   is_trainablezFPQuantHfQuantizer.is_trainable~   s0    ,,AA	NN E r   c                      y)NT )r   s    r   is_serializablez"FPQuantHfQuantizer.is_serializable   s    r   c                     ddl m}  ||       S )Nr   )FpQuantQuantize)rH   rR   )r   rR   s     r   get_quantize_opsz#FPQuantHfQuantizer.get_quantize_ops   s    ;t$$r   c                     ddl m} ddlm} | j                  r>| j
                  j                  r |dgd ||       g      gS  |dgd ||       g      gS g S )Nr   )WeightConverter)FpQuantDeserializez	.dqweight)source_patternstarget_patterns
operationsz.qweight)core_model_loadingrU   rH   rV   pre_quantizedr   r&   )r   rU   rV   s      r   get_weight_conversionsz)FPQuantHfQuantizer.get_weight_conversions   ss    8>''::#)4(3$6t$<#=  $)3(2$6t$<#=  	r   )r7   torch.dtyper3   r]   )r9   r	   r   )__name__
__module____qualname____doc__requires_calibrationis_qat_trainable__annotations__r   r   r2   r8   strboolrC   rI   propertyr   rM   rP   rS   r\   __classcell__)r   s   @r   r   r   "   s     !((8,C 8.`.? S _c 
 
 (+<"=  %
r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utils.quantization_configr
   utilsr   r   r   r   r   r   r"   
get_loggerr^   r+   r   rO   r   r   <module>rp      sO    +  2 09 t t ? 			H	%B Br   