
    im                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzgoogle/gemma2-7b)
checkpointc                   &    e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d<   d Zee
d!<   d"Zee
d#<   d$Ze	d%z  e
d&<   d'Ze	ee	   z  d%z  e
d(<   d)Ze	d%z  e
d*<   d"Zee
d+<   d%Zee z  d%z  e
d,<   d-Z!ee
d.<   d/Z"e	ez  d%z  e
d0<   dZ#e	e
d1<   d2Z$e	d%z  e
d3<   d%Z%ee   d%z  e
d4<   d5Z&ed%z  e
d6<   d7Z'ed%z  e
d8<   d%Z(ed%z  e
d9<    fd:Z)d; Z* xZ+S )<Gemma2Configa  
    query_pre_attn_scalar (`float`, *optional*, defaults to 256):
        scaling factor used on the attention scores
    final_logit_softcapping (`float`, *optional*, defaults to 30.0):
        scaling factor when applying tanh softcapping on the logits.
    attn_logit_softcapping (`float`, *optional*, defaults to 50.0):
        scaling factor when applying tanh softcapping on the attention scores.
    use_bidirectional_attention (`bool`, *optional*):
        If True, the model will attend to all text tokens instead of using a causal mask.

    ```python
    >>> from transformers import Gemma2Model, Gemma2Config
    >>> # Initializing a Gemma2 gemma2-7b style configuration
    >>> configuration = Gemma2Config()
    >>> # Initializing a model from the gemma2-7b style configuration
    >>> model = Gemma2Model(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gemma2past_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei 	  hidden_sizei $  intermediate_size   num_hidden_layers   num_attention_heads   num_key_value_heads   head_dimgelu_pytorch_tanhhidden_activationi    max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cacher   Npad_token_id   eos_token_id   bos_token_idtie_word_embeddingsrope_parametersFattention_biasg        attention_dropoutquery_pre_attn_scalari   sliding_windowlayer_typesg      >@final_logit_softcappingg      I@attn_logit_softcappinguse_bidirectional_attentionc                     | j                   ;t        | j                        D cg c]  }t        |dz   dz        rdnd c}| _         t	        |   di | y c c}w )Nr(   r*   sliding_attentionfull_attention )r2   ranger   boolsuper__post_init__)selfkwargsi	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/gemma2/configuration_gemma2.pyr=   zGemma2Config.__post_init__^   s`    #X]^b^t^tXu STtQUaK'8#>NN D 	''	 s   Ac                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)r>   s    rB   validate_architecturez"Gemma2Config.validate_architecturef   sS    d666!;#D$4$4#5 622327  <    ),__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r    r"   strr#   r$   floatr%   r&   r;   r'   r)   listr+   r,   r-   r   dictr.   r/   r0   r1   r2   r3   r4   r5   r=   rE   __classcell__)rA   s   @rB   r
   r
      s   ( J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK!s!s    Hc0s0#'S'#u#L%It L#* +,L#S	/D(, L#*  $$48O^d*T18 ND ,/sU{T)/!$3$!%NC$J%$(KcT!(,0UT\0+/EDL//33(rF   r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r9   rF   rB   <module>r[      sK   * / 3 1 # -.N# N  /Nb 
rF   