
    i!)                         d dl Z d dlZd dlZd dlmZ d dlmZmZ  e       rddlm	Z	 ddl
mZ  e       rd dlmZ d dlmZ  ej                   e      Zd	 Zd
 Z G d de	      Z G d de	      Zy)    N)logging)is_torch_availableis_torchao_available   )ConversionOps)get_module_from_name)unflatten_tensor_state_dict)is_metadata_torchaoc                    ddl m} ddlm} t	        | |      r*| j
                  j                   d| j                          dS t	        | |      r<| j
                  j                   d| j                   dt        | j                         dS y )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r   s      r/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/integrations/torchao.pyr   r   &   s    4g&/0""++,Af.G.G.I-J!LL&9:""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;    c                    t        | j                        }|7d| j                  j                  d    d| j                  j                  d    dS d| j                  j                  d    d| j                  j                  d    d| S )Nzin_features=   z, out_features=r   z, weight=Noner   )r   r   shape)selfr   s     r   _linear_extra_reprr    1   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjr   c                       e Zd Zd Zd Z	 	 	 d	deeej                  f   dej                  j                  dz  dedz  deeej                  f   fdZy)
TorchAoQuantizec                     || _         y Nhf_quantizerr   r&   s     r   __init__zTorchAoQuantize.__init__:   
    (r   c                    ddl m} t        |j                               j                  }| j
                  j                  r?|j                  dk(  r0|j                  d        |||g|i | |j                  d       y |||g|i | y)a7  Run quantize_, moving to CUDA first if CPU offloading is active.

        Some torchao quantization ops (e.g. int4 packing) only have CUDA kernels.
        When a layer is destined for CPU (e.g. CPU offloading), we temporarily move
        it to CUDA for quantization, then move the result back to CPU.
        r   )	quantize_cpucudaN)	torchao.quantizationr+   next
parametersdevicer&   offload_to_cputypeto)r   moduleconfigargskwargsr+   target_devices          r   	_quantizezTorchAoQuantize._quantize=   s|     	3V..0188++0B0Be0KIIfff6t6v6IIeff6t6v6r   N
input_dictmodelfull_layer_namereturnc                 D   t        |j                               d   \  }}t        |t              r|d   n|}t	        ||      \  }}	t
        j                  j                  ||j                        |j                  |	<   |j                         }
t        |      t        |
      k(  }| j                  j                  j                  }|r)|r't        |j                   j#                  d      dd       ddlm} | j                  j                  j)                         }t        ||      r|j+                  dd	      \  }}d }||j,                  v r(|j/                  d
      rJ d       |j0                  |   }n||j,                  v r(|j/                  d
      rJ d       |j0                  |   }n|j,                  D ]h  }|j/                  d
      st3        j4                  |dd  |      r|j0                  |   } nHt3        j4                  |dd  |      sY|j0                  |   } n |j0                  j7                  dd       }||dk(  rr|r|r|j8                  j;                         }| j=                  ||d        |j?                  |       d|_         |jC                  d      D ]	  }d|_          |r|rdiS i S  |||i      }| j=                  ||d        |j?                  |       d|_         |jC                  d      D ]	  }d|_          i S ||iS |r|r|j8                  j;                         }| j=                  || j                  j                  j)                                |j?                  |       d|_         |jC                  d      D ]	  }d|_          |r|rdiS i S )Nr   )requires_gradT)decodertie_word_embeddingsF)FqnToConfig.r   zre:zHparam fqn should not start with`re:`, which is used for specifying regexzImodule fqn should not start with`re:`, which is used for specifying regex   _defaultr   c                      y)NT )xfqns     r   <lambda>z)TorchAoQuantize.convert.<locals>.<lambda>   s    r   )recursezlm_head.weight)	filter_fn)"tupleitemsr   listr   torchnn	Parameterr@   _parametersget_input_embeddingsidr&   quantization_configuntie_embedding_weightssetattrr6   get_text_configr.   rC   get_apply_tensor_subclassrsplitfqn_to_config
startswithmodule_fqn_to_configre	fullmatchgetr   cloner:   discard_is_hf_initializedr0   )r   r;   r<   r=   missing_keysr8   _valuer5   tensor_nameinput_embedis_embedding_paramrX   rC   r6   
module_fqntop_level_param_namecmaybe_module_fqn_patternlm_headparamcustom_param_fqn_configs                         r   convertzTorchAoQuantize.convertN   s    ))+,Q/5&ud3a25/J*/((*<*<URWReRe*<*f;' 002Z2k?:"&"3"3"G"G"_"_"'9ELL000>@UW\]4""66PPRfk*/>/E/Ec1/M,J,A&"6"66%007 ^7 //@v333%007 _7 //
; 170D0D J,3>>uE &>qr&BOT"778PQ&>qr&BJO"778PQJ 3377
DIA}'83).E"(--"5"5"7NN61/BD ((904F-
 "(!2!25!2!A 83708:LQh,g6pnpp /:;OQR:S.T+NN6+BdNS ((904F-!'!2!25!2!A 83708I#U++"9mm))+Gvt00DD^^`a_-$(!&&u&5 	,E'+E$	,.@E\ '*dbddr   )NNN)r   
__module____qualname__r(   r:   dictstrrQ   TensorrR   Modulers   rH   r   r   r"   r"   9   sy    )7( )-&*\eell*+\e xx%\e t	\e 
c5<<	 \er   r"   c                       e Zd Zd Z	 	 	 	 d	deeej                  f   dee   dz  dej                  j                  dz  dedz  deeej                  f   f
dZy)
TorchAoDeserializec                     || _         y r$   r%   r'   s     r   r(   zTorchAoDeserialize.__init__   r)   r   Nr;   source_patternsr<   r=   r>   c           
         t        |j                               d   |v}i }dj                  |j                  d      dd       }	|r"t	        |d   t               r	|d   d   }
nZ|d   }
nT|j                         D ]A  }t        ||         dk7  rt        d| dt        ||          d	      ||   d   ||	 d| <   C |r|
iS t        | j                  j                        st        d
      t        || j                  j                        \  }}|rJ ||   }t        ||      \  }}t	        |t        j                  j                        rt        j                   t"        |      |_        ||iS )a&  
        Consolidates tensor subclass components before reconstructing the object

        For example:
            input_dict: {
                "_weight_qdata": torch.Tensor,
                "_weight_scale": torch.Tensor,
            }
            full_layer_name: "model.layers.0.self_attn.k_proj.weight"

            Given this, we reconstruct a Float8Tensor instance using the qdata and scale
            and return it as a dictionary with the full_layer_name as the key and the recovered
            Float8Tensor instance as the value.
        r   rD   Nr   r   zExpected a single tensor for z	 but got z tensors insteadz$Invalid torchao safetensors metadata)rP   keysjoinsplitr   len
ValueErrorr
   r&   metadatar	   r   rQ   rR   Lineartypes
MethodTyper    
extra_repr)r   r;   r}   r<   r=   rf   r8   is_unsafe_serialization
param_data
layer_namer   suffixunflattened_state_dictleftover_state_dict	new_paramr5   rg   s                    r   rs   zTorchAoDeserialize.convert   s   . #'z'8"9!"<O"S
XXo33C8"=>
"*X.5#H-a0#H-$//+ Mz&)*a/$7xyZX^M_I`Haaqr  8B&7I!7L
j\6(34M ##V,,$T%6%6%?%?@CDD6Q))227
3 3 '&&*?;	(@	fehhoo. % 0 01CV LF++r   )NNNN)r   rt   ru   r(   rv   rw   rQ   rx   rP   rR   ry   rs   rH   r   r   r{   r{      s    ) -1(,&*9,ell*+9, cT)9, xx%	9,
 t9, 
c5<<	 9,r   r{   )r`   r   rQ   transformers.utilsr   transformers.utils.import_utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr   1torchao.prototype.safetensors.safetensors_supportr	   /torchao.prototype.safetensors.safetensors_utilsr
   
get_loggerr   loggerr   r    r"   r{   rH   r   r   <module>r      sr    
   & T 2 >  T			H	%Pkqem qeh=, =,r   