
    iz                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zT5 model configuration    )strict   )PreTrainedConfig)auto_docstringzgoogle-t5/t5-small)
checkpointc                   v    e Zd ZU dZdZdgZdddddZd	Zee	d
<   dZ
ee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZee	d<   dZee	d<   dZee	d<   dZeez  e	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d <   dZee	d!<   d"Zedz  e	d#<   d$Zeee   z  dz  e	d%<   d&Zeez  e	d'<   d(Zee	d)<    fd*Z d+ Z! xZ"S ),T5Configa!  
    relative_attention_num_buckets (`int`, *optional*, defaults to 32):
        The number of buckets to use for each attention layer.
    relative_attention_max_distance (`int`, *optional*, defaults to 128):
        The maximum distance of the longer sequences for the bucket separation.
    feed_forward_proj (`string`, *optional*, defaults to `"relu"`):
        Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`. T5v1.1 uses the
        `"gated-gelu"` feed forward projection. Original T5 uses `"relu"`.
    t5past_key_valuesd_model	num_heads
num_layersd_kv)hidden_sizenum_attention_headsnum_hidden_layershead_dimi}  
vocab_sizei   @   i   d_ff   Nnum_decoder_layers       relative_attention_num_buckets   relative_attention_max_distanceg?dropout_rategư>layer_norm_epsilong      ?initializer_factorrelufeed_forward_projTis_encoder_decoder	use_cacher   pad_token_id   eos_token_idg        classifier_dropoutF
is_decoderc                 F   | j                   | j                   n| j                  | _         | j                  j                  d      }|d   | _        |d   dk(  | _        | j                  dk(  rd| _        |j                  dd       du| _        d	| _        t        | (  d
i | y )N-r   gatedz
gated-gelugelu_newtie_word_embeddingsFT )r   r   r"   splitdense_act_fnis_gated_actpopscale_decoder_outputsr/   super__post_init__)selfkwargsact_info	__class__s      x/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/t5/configuration_t5.pyr7   zT5Config.__post_init__@   s    '+'>'>'JD##PTP_P_ 	 ))//4$RL$QK72 !!\1 *D &,ZZ0Et%LTY%Y"#' ''    c                     | j                   j                  d      }t        |      dkD  r|d   dk7  st        |      dkD  rt        d| j                    d      y)	zOPart of `@strict`-powered validation. Validates the architecture of the config.r+   r&   r   r-      z`feed_forward_proj`: z is not a valid activation function of the dense layer. Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu'N)r"   r1   len
ValueError)r8   r:   s     r<   validate_architecturezT5Config.validate_architectureW   sf    ))//4x=1!!73x=1;L'(>(>'? @) )  <Mr=   )#__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   r   r   floatr   r    r"   strr#   boolr$   r%   r'   listr(   r)   r7   rB   __classcell__)r;   s   @r<   r	   r	      s    J#4"5 *)	M JGSD#ND#J%)d
)Is*,"C,+.#S. #L%#+# $$ ###s###It L#* +,L#S	/D(,&))J(.r=   r	   N)	rF   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r0   r=   r<   <module>rU      sJ     . 3 # /0G G  1GT ,r=   