
    i                        d dl Z d dlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZ  ed	
      e G d de                    Z G d de      Z G d de      Z G d de
      Z G d de      Z G d de      Zg dZy)    N)strict   )Cache)auto_docstring   )Gemma2Config)Gemma2AttentionGemma2DecoderLayerGemma2ForCausalLM	Gemma2MLPGemma2RMSNormzgoogle/vaultgemma-1b)
checkpointc                       e Zd ZdZ e       Zy)VaultGemmaConfiga(  
    query_pre_attn_scalar (`float`, *optional*, defaults to 256):
        scaling factor used on the attention scores
    final_logit_softcapping (`float`, *optional*, defaults to 30.0):
        scaling factor when applying tanh softcapping on the logits.
    attn_logit_softcapping (`float`, *optional*, defaults to 50.0):
        scaling factor when applying tanh softcapping on the attention scores.

    ```python
    >>> from transformers import VaultGemmaModel, VaultGemmaConfig
    >>> # Initializing a VaultGemma vaultgemma-7b style configuration
    >>> configuration = VaultGemmaConfig()
    >>> # Initializing a model from the vaultgemma-7b style configuration
    >>> model = VaultGemmaModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```N)__name__
__module____qualname____doc__AttributeErroruse_bidirectional_attention     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/vaultgemma/modular_vaultgemma.pyr   r      s    $ #1"2r   r   c                       e Zd Zy)VaultGemmaRMSNormNr   r   r   r   r   r   r   r   1       r   r   c                       e Zd Zy)VaultGemmaMLPNr   r   r   r   r   r   5   r   r   r   c                   ,     e Zd ZdZdedef fdZ xZS )VaultGemmaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperconfig	layer_idxc                 0    t         |           d| _        y )NT)super__init__	is_causal)selfr"   r#   	__class__s      r   r&   zVaultGemmaAttention.__init__<   s    r   )r   r   r   r   r   intr&   __classcell__r)   s   @r   r!   r!   9   s    G/ C  r   r!   c                       e Zd Z fdZ	 	 	 d
dej
                  deej
                  ej
                  f   dej
                  dz  dej                  dz  dedz  deej                  eej                  ej                  f   dz  f   fd	Z
 xZS )VaultGemmaDecoderLayerc                 ,    t        |   di | | `| `y )Nr   )r%   r&   post_attention_layernormpost_feedforward_layernorm)r(   super_kwargsr)   s     r   r&   zVaultGemmaDecoderLayer.__init__B   s    (<()+r   Nhidden_statesposition_embeddingsattention_maskposition_idspast_key_valuesreturnc           	          |}| j                  |      } | j                  d|||||d|\  }}||z   }|}| j                  |      }| j                  |      }||z   }|S )N)r3   r4   r5   r6   r7   r   )input_layernorm	self_attnpre_feedforward_layernormmlp)	r(   r3   r4   r5   r6   r7   kwargsresidual_s	            r   forwardzVaultGemmaDecoderLayer.forwardG   s     !,,];)4>> 
' 3)%+
 
q !=0 66}E/ =0r   )NNN)r   r   r   r&   torchTensortuple
LongTensorr   FloatTensorrA   r+   r,   s   @r   r.   r.   A   s    , /304(,|| #5<<#=> t+	
 &&-  
u  %(9(95;L;L(L"MPT"TT	Ur   r.   c                       e Zd Zy)VaultGemmaForCausalLMNr   r   r   r   rH   rH   e   r   r   rH   )r   rH   VaultGemmaModelVaultGemmaPreTrainedModel)rB   huggingface_hub.dataclassesr   cache_utilsr   utilsr   gemma2.configuration_gemma2r   gemma2.modeling_gemma2r	   r
   r   r   r   r   r   r   r!   r.   rH   __all__r   r   r   <module>rQ      s      .   # 6 u u 123| 3  33,	 		I 	/ !/ !H	- 	r   