
    i                         d dl Zd dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZ dd	lmZmZmZmZ dd
l
mZ  e       rd dlZ ej&                  e      Z G d de      Zy)    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)	AwqConfig)is_accelerate_availableis_gptqmodel_availableis_torch_availablelogging)
AwqBackendc                   b     e Zd ZU dZdZded<    fdZd Zd ZddZ	d	 Z
d
 Zed        Z xZS )AwqQuantizerzu
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://huggingface.co/papers/2306.00978)
    Tr	   quantization_configc                 &    t        |   |fi | y )N)super__init__)selfr   kwargs	__class__s      v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_awq.pyr   zAwqQuantizer.__init__-   s    ,77    c                 X    t               st        d      t               st        d      y )NzaLoading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`))r   ImportErrorr
   )r   r   s     r   validate_environmentz!AwqQuantizer.validate_environment0   s1    %'s  '(mnn )r   c                    |t         j                  k(  rct         j                  j                         st         j                  j                         r't
        j                  d       t         j                  }|S |t         j                  k7  rQt         j                  j                         st         j                  j                         rt
        j                  d       |S )Nz[`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`.zWWe suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.)torchbfloat16cudais_availablexpuloggerwarningfloat16)r   dtypes     r   update_dtypezAwqQuantizer.update_dtype9   s    ENN"

(?(?(AUYYE[E[E]NNm MME  emm#)@)@)BeiiF\F\F^NNtur   c                    ddl m}m} | j                  || j                  j
                  |j                  d      | _         ||| j                  | j
                  |j                  d            } |||j                  j                        }y )Nr   )replace_quantization_scalesreplace_with_awq_linearT)add_default_skips
device_map)r   modules_to_not_convertr,   )
integrationsr)   r*   get_modules_to_not_convertr   r-   _keep_in_fp32_modulesgetconfig
model_type)r   modelr   r)   r*   s        r   $_process_model_before_weight_loadingz1AwqQuantizer._process_model_before_weight_loadingC   s    W&*&E&E4++BBED_D_sw 'F '
# ( $ 8 8#'#>#>zz,/	
 ,E5<<3J3JKr   c                 L    ddl m}  ||| j                  j                         y )Nr   )hf_gptqmodel_post_init)use_act_order)gptqmodel.utils.modelr7   r   desc_act)r   r4   r   r7   s       r   #_process_model_after_weight_loadingz0AwqQuantizer._process_model_after_weight_loadingS   s    @uD4L4L4U4UVr   c                     | j                   j                  t        j                  t        j                  fv rt
        j                  d       yy)Nz7You cannot save an AWQ model that uses Exllama backend!FT)r   backendr   
EXLLAMA_V1
EXLLAMA_V2r#   r$   r   s    r   is_serializablezAwqQuantizer.is_serializableX   s:    ##++
0E0EzG\G\/]]NNTUr   c                     t        j                  t        j                  j                  d            t        j                  d      k\  S )N	gptqmodelz5.0.0)r   parse	importlibmetadatar@   s    r   is_trainablezAwqQuantizer.is_trainable_   s1    }}Y//77DEW^I___r   )r4   r   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r   r'   r5   r;   rA   propertyrG   __classcell__)r   s   @r   r   r   $   sM    
  $$8oL W
 ` `r   r   )importlib.metadatarE   typingr   	packagingr   baser   modeling_utilsr   utils.quantization_configr	   utilsr
   r   r   r   r   r   
get_loggerrH   r#   r    r   r   <module>rY      sR         05 ` ` 2 			H	%=`; =`r   