
    i                         d dl mZ ddlmZ erddlmZ ddlmZ ddlm	Z	m
Z
mZ  e
       rd dlZ ej                  e      Z G d	 d
e      Zy)    )TYPE_CHECKING   )HfQuantizer   )PreTrainedModel)BitNetQuantConfig)is_accelerate_availableis_torch_availableloggingNc                        e Zd ZU dZdZded<    fdZd Z	 	 ddZde	e
ee
z  f   d	e	e
ee
z  f   fd
Zd Zed	efd       Zed	efd       Zd Z xZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    Tr   quantization_configc                 &    t        |   |fi | y )N)super__init__)selfr   kwargs	__class__s      y/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_bitnet.pyr   zBitNetHfQuantizer.__init__,   s    ,77    c                    t               st        d      t        j                  j	                         st
        j                  d       y |j                  d      }|t
        j                  d       y t        |t              r>t        |      dkD  rd|j                         v sd|j                         v rt        d      y y )	NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.r   cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r	   ImportErrortorchcudais_availableloggerwarning_onceget
isinstancedictlenvalues
ValueError)r   argsr   r   s       r   validate_environmentz&BitNetHfQuantizer.validate_environment/   s    &(oppzz&&(z ZZ-
I 
D):"u
0A0A0C'CvQ[QbQbQdGd g  He *r   c                     ddl m} | j                  || j                  j                  |j
                        | _         ||| j                  | j                        }y )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   )integrationsr*   get_modules_to_not_convertr   r+   _keep_in_fp32_modules)r   modelr   r*   s       r   $_process_model_before_weight_loadingz6BitNetHfQuantizer._process_model_before_weight_loadingF   sT    
 	>&*&E&E4++BBED_D_'
# +#'#>#> $ 8 8
r   
max_memoryreturnc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   r1   keyvals       r   adjust_max_memoryz#BitNetHfQuantizer.adjust_max_memoryW   s6    6@6F6F6HI(#sc3:oI
I Js   )c                      y)NT r   s    r   is_serializablez!BitNetHfQuantizer.is_serializable[   s    r   c                 j    | j                   j                  dk(  xr | j                   j                  dk(  S )Nautobitlinearonliner   linear_classquantization_moder:   s    r   is_trainablezBitNetHfQuantizer.is_trainable^   s7     $$11_D G((::hF	
r   c                 j    | j                   j                  dk(  xr | j                   j                  dk(  S )zUFlag indicating whether the quantized model can carry out quantization aware trainingr=   r>   r?   r:   s    r   is_qat_trainablez"BitNetHfQuantizer.is_qat_trainablee   s7     $$11_D G((::hF	
r   c                     ddl m} ddlm} | j                  j
                  dk(  r.| j                  j                  dk(  r |dgdg ||       g      gS g S )Nr   )WeightConverter)BitNetDeserializer=   offlineweight)source_patternstarget_patterns
operations)core_model_loadingrF   integrations.bitnetrG   r   r@   rA   )r   rF   rG   s      r   get_weight_conversionsz(BitNetHfQuantizer.get_weight_conversionsm   sb    8; $$11_D((::iG  %-J%-J 1$ 78  	r   )r/   r   )__name__
__module____qualname____doc__requires_calibration__annotations__r   r(   r0   r#   strintr7   r;   propertyboolrB   rD   rO   __classcell__)r   s   @r   r   r   !   s      ,,8.
 
"DcCi,@ T#sUXy.EY  
d 
 
 
$ 
 
r   r   )typingr   baser   modeling_utilsr   utils.quantization_configr   utilsr	   r
   r   r   
get_loggerrP   r   r   r9   r   r   <module>ra      sK    !  0= H H  
		H	%[ [r   