
    iE                         d dl mZ d dlmZ ddlmZ ddlmZ ddlm	Z	  e	d      e G d	 d
e                    Z
 e	d      e G d de                    Zdd
gZy)    )Any)strict   )PreTrainedConfig)RopeParameters)auto_docstringzgoogle/t5_gemma_module-7b)
checkpointc                        e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d<   d Zee
d!<   d"Zee
d#<   d$Ze	d%z  e
d&<   d'Ze	ee	   z  d%z  e
d(<   d)Ze	d%z  e
d*<   d"Zee
d+<   d%Zee z  d%z  e
d,<   d-Z!ee
d.<   d/Z"e	ez  d%z  e
d0<   dZ#e	e
d1<   d2Z$e	d%z  e
d3<   d%Z%ee   d%z  e
d4<   d5Z&ed%z  e
d6<   d7Z'ed%z  e
d8<   d-Z(ee
d9<    fd:Z)d; Z* xZ+S )<T5GemmaModuleConfigaA  
    query_pre_attn_scalar (`float`, *optional*, defaults to 256):
        scaling factor used on the attention scores
    final_logit_softcapping (`float`, *optional*, defaults to 30.0):
        scaling factor when applying tanh softcapping on the logits.
    attn_logit_softcapping (`float`, *optional*, defaults to 50.0):
        scaling factor when applying tanh softcapping on the attention scores.

    ```python
    >>> from transformers import T5GemmaModuleModel, T5GemmaModuleConfig
    >>> # Initializing a T5GemmaModule t5_gemma_module-7b style configuration
    >>> configuration = T5GemmaModuleConfig()
    >>> # Initializing a model from the t5_gemma_module-7b style configuration
    >>> model = T5GemmaModuleModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```t5_gemma_modulepast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnorm  
vocab_sizei 	  hidden_sizei $  intermediate_size   num_hidden_layers   num_attention_heads   num_key_value_heads   head_dimgelu_pytorch_tanhhidden_activationi    max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cacher   Npad_token_id   eos_token_id   bos_token_idtie_word_embeddingsrope_parametersFattention_bias        attention_dropoutquery_pre_attn_scalari   sliding_windowlayer_typesg      >@final_logit_softcappingg      I@attn_logit_softcapping
is_decoderc                     | j                   ;t        | j                        D cg c]  }t        |dz   dz        rdnd c}| _         t	        |   di | y c c}w )Nr*   r,   sliding_attentionfull_attention )r5   ranger   boolsuper__post_init__)selfkwargsi	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/t5gemma/configuration_t5gemma.pyr@   z!T5GemmaModuleConfig.__post_init___   s`    #X]^b^t^tXu STtQUaK'8#>NN D 	''	 s   Ac                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)rA   s    rE   validate_architecturez)T5GemmaModuleConfig.validate_architectureg   sS    d666!;#D$4$4#5 622327  <    ),__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r    r"   r$   strr%   r&   floatr'   r(   r>   r)   r+   listr-   r.   r/   r   dictr0   r2   r3   r4   r5   r6   r7   r8   r@   rH   __classcell__rD   s   @rE   r   r      s   $ #J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK!s!s    Hc0s0#'S'#u#L%It L#* +,L#S	/D(, L#*  $$48O^d*T18 ND ,/sU{T)/!$3$!%NC$J%$(KcT!(,0UT\0+/EDL/J(rI   r   c                        e Zd ZU dZdZdgZeedZdZee	e
e
f   z  dz  ed<   dZee	e
e
f   z  dz  ed<   dZeed	<   d
Zeez  ed<   d
Zeez  ed<   d
Zeez  ed<   dZeed<   dZeed<    fdZ xZS )T5GemmaConfiga  
    encoder (`Union[T5GemmaModuleConfig, dict]`, optional, *optional*):
        Configuration for the encoder.
    decoder (`Union[T5GemmaModuleConfig, dict]`, optional, *optional*):
        Configuration for the decoder.

    Example:

    ```python
    >>> from transformers import T5GemmaConfig, T5GemmaModel
    >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
    >>> model = T5GemmaModel(t5gemma_config)
    ```t5gemmar   )encoderdecoderNr]   r^   Tis_encoder_decoderr1   dropout_rateclassifier_dropout_rater2   r.   r   r   c                    t        | j                  t              rt        di | j                  | _        n| j                  t               | _        t        | j                  t              rt        di | j                  | _        n| j                  t               | _        d| j                  _        | j                  | j                  _        | j                  | j                  _        d| j                  _        d| j                  _        | j                  | j                  _        | j                  | j                  _        | j                  j                  | j                  _
        |j                  d| j                  j                        | _        dD ]   }||vst        | j                  |      ||<   " t        | <  di | y )NFTr&   )r-   r)   r+   r<   )
isinstancer]   rW   r   r^   r8   r`   r2   r(   r   cross_attention_hidden_sizepopr&   getattrr?   r@   )rA   rB   special_token_keyrD   s      rE   r@   zT5GemmaConfig.__post_init__   sP   dllD).>>DL\\!.0DLdllD).>>DL\\!.0DL"'$($5$5!)-)?)?&"&!%$($5$5!)-)?)?&37<<3K3K0!',?A_A_!`!Q 	U .,3DLLBS,T()	U 	''rI   )rJ   rK   rL   rM   rN   rO   r   sub_configsr]   rW   r   rS   r^   r_   r>   r`   rR   rU   ra   r2   r.   r   r@   rX   rY   s   @rE   r[   r[   p   s     J#4"51>QRK;?G 4S>1D8?;?G 4S>1D8?## #L#+#+.S5[.%(us{( $$J( (rI   r[   N)typingr   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   r[   __all__r<   rI   rE   <module>ro      s|   *  . 3 1 # 67M* M  8M` 677($ 7(  87(t 1
2rI   