
    i                         d dl mZ d dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZmZ  e       rd d
lZ ej&                  e      ZdZdZ G d de      Zy
)    )metadata)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinNz1.4.3z1.24.0c                   x     e Zd ZU dZdZded<   def fdZd ZddZ	d	 Z
dd
ZddZedefd       Zd Z xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    the GPT-QModel package (Python import name `gptqmodel`). Quantization is done under the hood for users if they
    load a non-prequantized model.
    Fr   quantization_configc                     t        |   |fi | t               st        d      ddlm} |j                  | j                  j                               | _	        y )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       w/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__1   sM    ,77#%ghh.!.!8!89Q9Q9a9a9c!d    c                    t               st        d      t               }|s)t        j                  j                         st        d      t               st        d      t               rt        j                  t        j                  d            t        j                  t              k  sBt        j                  t        j                  d            t        j                  t              k  rt        dt         dt               y y )Nr   z2GPU is required to quantize or run quantize model.zTLoading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) library.	gptqmodeloptimumz#The gptqmodel version should be >= z, optimum version should >= )r   r   r
   torchcudais_availableRuntimeErrorr   parser   MIN_GPTQ_VERSIONMIN_OPTIMUM_VERSION)r   argsr   gptq_supports_cpus       r    validate_environmentz$GptqHfQuantizer.validate_environment:   s    #%ghh24 )@)@)BSTT')tuu#%MM(**;787==IY;ZZ}}X--i89GMMJ]<^^56F5GGcdwcxy  _ &r!   returnc                 V    |t         j                  k7  rt        j                  d       |S )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r%   float16loggerinfo)r   dtypes     r    update_dtypezGptqHfQuantizer.update_dtypeK   s     EMM!KKfgr!   c                 8    |dt        j                  d      i}|S )N cpu)r%   device)r   
device_maps     r    update_device_mapz!GptqHfQuantizer.update_device_mapP   s!    ell512Jr!   c                 \   |j                   j                  dk7  rt        d      | j                  r|t	        j
                  t        j                  d            t	        j
                  t              k  r| j                  j                  |      }y  | j                  j                  |fi |}y y )N	input_idsz%We can only quantize pure text model.r$   )
r   main_input_namer(   pre_quantizedr   r)   r   r+   r   convert_modelr   modelr   s      r    $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingU   s    ??**k9FGG}}X--i89GMMJ]<^^..<<UC<..<<UMfM r!   c                    | j                   r| j                  j                  |      }y | j                  j                  |j
                  | j                  _        | j                  j                  || j                  j                         t        j                  | j                  j                               |j                  _        y )N)r?   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrA   s      r    #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loading`   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r!   c                      yNT r   s    r    is_trainablezGptqHfQuantizer.is_trainablej   s    r!   c                      yrM   rN   rO   s    r    is_serializablezGptqHfQuantizer.is_serializablen   s    r!   )r4   torch.dtyper/   rS   )rB   r	   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r   r.   r5   r;   rC   rK   propertyboolrP   rR   __classcell__)r   s   @r    r   r   '   s`     !%%e,C e"

	Nf d  r!   r   )	importlibr   typingr   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   r%   
get_loggerrT   r2   r*   r+   r   rN   r!   r    <module>re      s]         0 ] ] K 			H	%   Hk Hr!   