
    i%                     8   d dl Z d dlmZ d dl mZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZmZmZm Z m!Z!m"Z" ddiZ#dZ$ ejJ                  e&      Z' ed      e G d de                    Z( G d dejR                        Z* G d dejV                        Z, G d de      Z- G d de"      Z. G d d e      Z/ G d! d"e!      Z0 G d# d$e       Z1 G d% d&e      Z2 G d' d(e      Z3 G d) d*e      Z4g d+Z5y),    N)strict)nn   )initialization)CacheDynamicCache)PreTrainedConfig)create_causal_mask)BaseModelOutputWithPast)RopeParameters)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding
vocab_fileztokenizer.modelu   ▁zgoogle/gemma-7b)
checkpointc                      e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d <   d!Zee
d"<   d#Ze	d$z  e
d%<   d&Ze	ee	   z  d$z  e
d'<   d(Ze	d$z  e
d)<   d!Zee
d*<   d$Zee z  d$z  e
d+<   d,Z!ee
d-<   d.Z"ee	z  e
d/<   d$Z#ed$z  e
d0<   y$)1GemmaConfiga  
    use_bidirectional_attention (`bool`, *optional*):
        If True, the model will attend to all text tokens instead of using a causal mask.

    ```python
    >>> from transformers import GemmaModel, GemmaConfig
    >>> # Initializing a Gemma gemma-7b style configuration
    >>> configuration = GemmaConfig()
    >>> # Initializing a model from the gemma-7b style configuration
    >>> model = GemmaModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gemmapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei `  intermediate_size   num_hidden_layers   num_attention_headsnum_key_value_heads   head_dimgelu_pytorch_tanh
hidden_acti    max_position_embeddingsg{Gz?initializer_rangeư>rms_norm_epsT	use_cacher   Npad_token_id   eos_token_idr   bos_token_idtie_word_embeddingsrope_parametersFattention_biasg        attention_dropoutuse_bidirectional_attention)$__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr*   int__annotations__r+   r,   r.   r0   r1   r3   r5   strr6   r7   floatr9   r:   boolr;   r=   listr>   r?   r@   r   dictrA   rB   rC        x/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/gemma/modular_gemma.pyr   r   1   sd    J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!!!Hc)J)#'S'#u#L%It L#* +,L#S	/D(, L#*  $$48O^d*T18 ND %(us{(/33rT   r   c            	       Z     e Zd ZdZd	dedededef fdZdej                  f fdZ	 xZ
S )
GemmaTextScaledWordEmbeddingz\
    This module overrides nn.Embeddings' forward by multiplying with embeddings scale.
    num_embeddingsembedding_dimpadding_idxembed_scalec                     t         |   |||       || _        | j                  dt	        j
                  |      d       y )Nr[   F)
persistent)super__init__scalar_embed_scaleregister_buffertorchtensor)selfrX   rY   rZ   r[   	__class__s        rU   r_   z%GemmaTextScaledWordEmbedding.__init__n   s;    D"-]ELL,ERWXrT   r#   c                     t         |   |      | j                  j                  | j                  j
                        z  S N)r^   forwardr[   toweightdtype)rd   r#   re   s     rU   rh   z$GemmaTextScaledWordEmbedding.forwards   s2    wy)D,<,<,?,?@Q@Q,RRRrT   )      ?)rD   rE   rF   rG   rL   rO   r_   rb   Tensorrh   __classcell__re   s   @rU   rW   rW   i   sG    Ys Y3 YS Y_d Y
S S SrT   rW   c                   <     e Zd Zddedef fdZd Zd Zd Z xZ	S )GemmaRMSNormdimepsc                     t         |           || _        t        j                  t        j                  |            | _        y rg   )r^   r_   rs   r   	Parameterrb   zerosrj   )rd   rr   rs   re   s      rU   r_   zGemmaRMSNorm.__init__x   s.    ll5;;s#34rT   c                     |t        j                  |j                  d      j                  dd      | j                  z         z  S )Nr   T)keepdim)rb   rsqrtpowmeanrs   )rd   xs     rU   _normzGemmaRMSNorm._norm}   s4    5;;quuQx}}R}>IJJJrT   c                     | j                  |j                               }|d| j                  j                         z   z  }|j                  |      S )Nrl   )r~   rO   rj   type_as)rd   r}   outputs      rU   rh   zGemmaRMSNorm.forward   sC    AGGI& 3!2!2!445~~a  rT   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tuplerj   shapers   )rd   s    rU   
extra_reprzGemmaRMSNorm.extra_repr   s'    ))*+6$((<<rT   )r8   )
rD   rE   rF   rL   rO   r_   r~   rh   r   rn   ro   s   @rU   rq   rq   w   s&    5C 5e 5
K!=rT   rq   c                        e Zd Z fdZ xZS )GemmaMLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NF)bias)	r^   r_   r   Linearr+   r,   	gate_projup_proj	down_projrd   configre   s     rU   r_   zGemmaMLP.__init__   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXrT   )rD   rE   rF   r_   rn   ro   s   @rU   r   r      s    Y YrT   r   c                       e Zd Zy)GemmaRotaryEmbeddingNrD   rE   rF   rS   rT   rU   r   r          rT   r   c                   ,     e Zd ZdZdedef fdZ xZS )GemmaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr   	layer_idxc                 H    t         |           t        |dd       | _        y )NrC   F)r^   r_   getattr	is_causal)rd   r   r   re   s      rU   r_   zGemmaAttention.__init__   s"    $V-JERRrT   )rD   rE   rF   rG   r   rL   r_   rn   ro   s   @rU   r   r      s"    GS{ Ss S SrT   r   c                   :    e Zd Z ej                         d        Zy)GemmaPreTrainedModelc                    t        j                  | |       d|j                  j                  v r t	        j
                  |j                         y t        |t              r+t	        j                  |j                  |j                         y y )NRMSNorm)r   _init_weightsre   rD   initzeros_rj   
isinstancerW   	constant_r[   r`   )rd   modules     rU   r   z"GemmaPreTrainedModel._init_weights   s`    %%dF3((111KK& <=NN6--v/H/HI >rT   N)rD   rE   rF   rb   no_gradr   rS   rT   rU   r   r      s    U]]_J JrT   r   c                        e Zd Zdef fdZ	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  d	e
dz  d
ee   defdZ xZS )
GemmaModelr   c                     t         |   |       t        |j                  |j                  | j
                  | j                  j                  dz        | _        y )Ng      ?)r[   )r^   r_   rW   r*   r+   rZ   r   r'   r   s     rU   r_   zGemmaModel.__init__   sK     8v1143C3CQUQ\Q\QhQhjmQm
rT   Nr#   r&   position_idsr    r$   r:   kwargsreturnc           
      N   |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }t        | j                  ||||      }	|}
| j                  |
|      }| j                  d | j                  j                   D ]  } ||
f|	||||d|}
 | j                  |
      }
t        |
|r|	      S d 	      S )
Nz:You must specify exactly one of input_ids or inputs_embeds)r   r   r<   )device)r   r$   r&   r    r   )r   )r&   r   r    r:   position_embeddings)last_hidden_stater    )
ValueErrorr'   r   r   get_seq_lengthrb   aranger   r   	unsqueezer
   
rotary_embr(   r.   r)   r   )rd   r#   r&   r   r    r$   r:   r   past_seen_tokenscausal_maskr%   r   decoder_layers                rU   rh   zGemmaModel.forward   s[    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 &"oom,oW![[)H4;;+H+HI 		M)*) /#$7 M		 		-0&+/8O
 	
>B
 	
rT   )NNNNNN)rD   rE   rF   r   r_   rb   
LongTensorrm   r   FloatTensorrP   r   r   r   rh   rn   ro   s   @rU   r   r      s    
{ 
 .2.204(,26!%2
##d*2
 t+2
 &&-	2

 2
 ((4/2
 $;2
 +,2
 
!2
rT   r   c                        e Zd Z fdZ xZS )GemmaForCausalLMc                  8     t               j                  di | S )a|  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, GemmaForCausalLM

        >>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```rS   )r^   rh   )super_kwargsre   s    rU   rh   zGemmaForCausalLM.forward   s    $ uw...rT   )rD   rE   rF   rh   rn   ro   s   @rU   r   r      s    / /rT   r   c                       e Zd Zy)GemmaForSequenceClassificationNr   rS   rT   rU   r   r      r   rT   r   c                       e Zd Zy)GemmaForTokenClassificationNr   rS   rT   rU   r   r     r   rT   r   )r   r   r   r   r   r   )6rb   huggingface_hub.dataclassesr   r    r   r   cache_utilsr   r   configuration_utilsr	   masking_utilsr
   modeling_outputsr   modeling_rope_utilsr   modeling_utilsr   processing_utilsr   utilsr   r   r   llama.modeling_llamar   r   r   r   r   r   r   r   VOCAB_FILES_NAMESSPIECE_UNDERLINE
get_loggerrD   loggerr   	EmbeddingrW   Modulerq   r   r   r   r   r   r   r   r   __all__rS   rT   rU   <module>r      s#  "  .  & . 3 / 7 1 - & @ @	 	 	 "#45  			H	% ,-34" 34  .34lS2<< S=299 =(Yx Y	/ 	S^ SJ/ J:
 :
z/' /,	%C 		"= 	rT   