
    i                        d dl Z d dlmZ ddlmZ ddlmZmZ erddlm	Z	 ddl
mZ d d	lmZ dd
lmZmZmZ dZ e       rddlmZ  e       rd dlZ e       rd dlmZ  ej0                  e      Zdededz  fdZ G d de      Zy)    N)TYPE_CHECKING   )HfQuantizer)get_module_from_nameshould_convert_module   )PreTrainedModel)TorchAoConfig)	safe_open)is_torch_availableis_torchao_availableloggingz2.5.0)WeightConverter)flatten_tensor_state_dictconfig_namereturnc                 t    t        j                  d| j                               }|r|j                  d      S dS )z
    Extract the size digit from torchao config class names like "Int4WeightOnlyConfig", "Int8WeightOnlyConfig".
    Returns the digit as a string if found, otherwise None.
    z
(\d)weightr   N)researchlowergroup)r   matchs     z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_torchao.py_fuzzy_match_sizer   1   s2    
 IIm[%6%6%89E"5;;q>,,    c                        e Zd ZU dZdZded<    fdZd Zd Zdd	d
e	ddde
f fdZdee	ee	z  f   dee	ee	z  f   fdZdddZdd	d
e	defdZdefdZedefd       Zedefd       Zdee	   fdZd Zd Z xZS )TorchAoHfQuantizerz?
    Quantizer for torchao: https://github.com/pytorch/ao/
    Fr
   quantization_configc                     t        |   |fi | t        t        | j                  j
                        j                        }|dk(  rd| _        y d| _        y )N4g      ?r   )super__init__r   typer   
quant_type__name__quantized_param_size)selfr   kwargs
size_digit	__class__s       r   r"   zTorchAoHfQuantizer.__init__B   sM    ,77&tD,D,D,O,O'P'Y'YZ
+5+<C!!!r   c                 j   t               st        d      |j                  d      }d| _        t	        |t
              rvd|j                         v sd|j                         v rQt        |      dkD  rBd|j                         v | _        | j                  rd|j                         v rt        d      y y y y y )NzSLoading an torchao quantized model requires torchao library (`pip install torchao`)
device_mapFdiskcpur   zYou are attempting to perform disk offload with a pre-quantized torchao model This is not supported yet . Please remove the disk device from the device_map.)
r   ImportErrorgetoffload_to_cpu
isinstancedictvalueslenpre_quantized
ValueError)r'   argsr(   r,   s       r   validate_environmentz'TorchAoHfQuantizer.validate_environmentH   s    #%sttZZ-
#j$'*++--*:K:K:M1MSVWaSbefSf&+z/@/@/B&B#%%&J4E4E4G*G$i  +H% Tg1M (r   c                 4    t        |j                               S )zv
        We flatten the state dict of tensor subclasses so that it is compatible with the safetensors format.
        )r   
state_dict)r'   models     r   get_state_dict_and_metadataz.TorchAoHfQuantizer.get_state_dict_and_metadataW   s     ))9)9);<<r   r<   r	   
param_nameparamztorch.Tensorr   c                 z    | j                  ||      r| j                  | j                  S t        |   |||      S )z4Return the element size (in bytes) for `param_name`.)param_needs_quantizationr&   r!   param_element_size)r'   r<   r>   r?   r*   s       r   rB   z%TorchAoHfQuantizer.param_element_size]   s>    ((
;@Y@Y@e,,,w)%UCCr   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r'   rC   keyvals       r   adjust_max_memoryz$TorchAoHfQuantizer.adjust_max_memoryd   s6    5?5E5E5GHcc39nH
H Is   )c                 \   | j                  || j                  j                  |j                        | _        | j                  j                  r|j                         }|j                         D cg c]  \  }}t        |      t        |      k(  s|! }}}|j                         }|j                         D cg c]  \  }}t        |      t        |      k(  s|! }	}}| j                  D 
cg c]  }
|
||	z   vs|
 c}
| _        || j                  |       y y c c}}w c c}}w c c}
w N)
get_modules_to_not_convertr   modules_to_not_convert_keep_in_fp32_modulesinclude_input_output_embeddingsget_input_embeddingsnamed_modulesidget_output_embeddingsset_metadata)r'   r<   checkpoint_filesr(   	input_embnamemoduleinput_emb_names
output_emboutput_emb_namesxs              r   $_process_model_before_weight_loadingz7TorchAoHfQuantizer._process_model_before_weight_loadingi   s   &*&E&E4++BBED_D_'
# ##CC224I8=8K8K8MmfQSTZQ[_abk_lQltmOm446J9>9L9L9NovRTU[R\`bcm`nRnoo66+!?UeCe:e+D' './ ( no+s$   0DD:D#D#/D)<D)c                    t        || j                        syt        ||      \  }}t        j                  j
                  g}| j                  j                  r)|j                  t        j                  j                         ddl
m}m} t        | j                  j                  |      r|j                  dd      \  }	}
 ||	| j                  j                        sT ||| j                  j                        s7d| j                  j                  j                   v rt        |t#        |            ryt        |t#        |            xr |dk(  S )	NFr   )FqnToConfigfqn_matches_fqn_config.r   _defaultTweight)r   rL   r   torchnnLinearr   rN   append	Embeddingtorchao.quantizationr^   r_   r2   r$   rsplitfqn_to_configtuple)r'   r<   r>   r(   rW   tensor_name_QUANTIZABLEr^   r_   
module_fqn_s              r   rA   z+TorchAoHfQuantizer.param_needs_quantizationy   s   $Z1L1LM 35*E(##CC 2 23Ld..99;G&--c15MJ&z43K3K3V3VW)*d6N6N6Y6YZ$":":"E"E"S"SS"65+>? &%"56R;(;RRr   c                      yNT r'   s    r   is_serializablez"TorchAoHfQuantizer.is_serializable   s    r   c                 l    t        t        | j                  j                        j                        dk(  S )N8)r   r#   r   r$   r%   rs   s    r   is_trainablezTorchAoHfQuantizer.is_trainable   s,     !d&>&>&I&I!J!S!STX[[[r   c                      yrq   rr   rs   s    r   is_compileablez!TorchAoHfQuantizer.is_compileable   s    r   rT   c                     |d   j                  d      rLi }|D ]=  }t        |d      5 }|j                         xs i }|j                  |       d d d        ? || _        y y # 1 sw Y   RxY w)Nr   z.safetensorspt)	framework)endswithr   metadataupdate)r'   rT   r~   
checkpointf	metadata_s         r   rS   zTorchAoHfQuantizer.set_metadata   sy    A''7H. /
zT: /a !

 2IOOI./ //
 %DM 8/ /s   &A""A+	c                     ddl m}  ||       S )Nr   )TorchAoQuantize)integrations.torchaor   )r'   r   s     r   get_quantize_opsz#TorchAoHfQuantizer.get_quantize_ops   s    :t$$r   c                 Z    ddl m} | j                  rt        g dd ||       g      gS g S )Nr   )TorchAoDeserialize)_weight_qdata_weight_scale_and_zero_weight_scale_weight_zero_point_weight_act_pre_scalerb   )source_patternstarget_patterns
operations)r   r   r6   r   )r'   r   s     r   get_weight_conversionsz)TorchAoHfQuantizer.get_weight_conversions   s<    =% %- 24 89   	r   rJ   )r<   r	   )r%   
__module____qualname____doc__requires_calibration__annotations__r"   r9   r=   strfloatrB   r3   intrH   r\   boolrA   rt   propertyrw   ry   listrS   r   r   __classcell__)r*   s   @r   r   r   :   s    !((D=D(9 Ds DSa Dfk DDcCi,@ T#sUXy.EY 
0 S.? SS S_c S6  \d \ \   %T#Y %%
r   r   )r   typingr   baser   quantizers_utilsr   r   modeling_utilsr	   utils.quantization_configr
   safetensorsr   utilsr   r   r   MIN_TORCH_VERSIONcore_model_loadingr   rc   1torchao.prototype.safetensors.safetensors_supportr   
get_loggerr%   loggerr   r   r   rr   r   r   <module>r      s    
    I 09 ! E E   4 
 
		H	%-3 -3: -I Ir   