
    i                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zProphetNet model configuration    )strict   )PreTrainedConfig)auto_docstringz"microsoft/prophetnet-large-uncased)
checkpointc                      e Zd ZU dZdZdgZddiZdZee	z  e
d<   dZee
d	<   d
Ze	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee	z  e
d<   dZee	z  e
d<   dZe	e
d<   dZee
d<   dZee
d<   dZee
d<   dZe	d z  e
d!<   d"Ze	e
d#<   d$Ze	e
d%<   d&Ze	e
d'<   d(Z ee
d)<   d*Z!ee
d+<   dZ"ee
d,<   dZ#e	d z  e
d-<   d.Z$e	d z  e
d/<   d"Z%e	e&e	   z  d z  e
d0<   d(Z'ee
d1<   dZ(ee
d2<   e)d3e	fd4       Z*e*jV                  d5        Z*y )6ProphetNetConfiga  
    ngram (`int`, *optional*, defaults to 2):
        Number of future tokens to predict. Set to 1 to be same as traditional Language model to predict next first
        token.
    num_buckets (`int`, *optional*, defaults to 32):
        The number of buckets to use for each attention layer. This is for relative position calculation. See the
        [T5 paper](see https://huggingface.co/papers/1910.10683) for more details.
    relative_max_distance (`int`, *optional*, defaults to 128):
        Relative distances greater than this number will be put into the last same bucket. This is for relative
        position calculation. See the [T5 paper](see https://huggingface.co/papers/1910.10683) for more details.
    disable_ngram_loss (`bool`, *optional*, defaults to `False`):
        Whether be trained predicting only the next first token.
    eps (`float`, *optional*, defaults to 0.0):
        Controls the `epsilon` parameter value for label smoothing in the loss calculation. If set to 0, no label
        smoothing is performed.
    
prophetnetpast_key_valuesnum_attention_headsnum_encoder_attention_headsg?activation_dropoutgeluactivation_functioni:w  
vocab_sizei   hidden_sizei   encoder_ffn_dim   num_encoder_layers   decoder_ffn_dimnum_decoder_layersnum_decoder_attention_headsattention_dropoutdropouti   max_position_embeddingsg{Gz?init_stdTis_encoder_decoderadd_cross_attentionr   Ndecoder_start_token_id   ngram    num_buckets   relative_max_distanceFdisable_ngram_lossg        eps	use_cachepad_token_id   bos_token_ideos_token_id
is_decodertie_word_embeddingsreturnc                     | j                   S )N)r   )selfs    /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/prophetnet/configuration_prophetnet.pynum_hidden_layersz"ProphetNetConfig.num_hidden_layersM   s    &&&    c                     t        d      )NzyThis model does not support the setting of `num_hidden_layers`. Please set `num_encoder_layers` and `num_decoder_layers`.)NotImplementedError)r2   values     r3   r4   z"ProphetNetConfig.num_hidden_layersQ   s    !%
 	
r5   ),__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   floatint__annotations__r   strr   r   r   r   r   r   r   r   r   r   r   r   r   boolr   r    r"   r$   r&   r'   r(   r)   r*   r,   r-   listr.   r/   propertyr4   setter r5   r3   r	   r	      s   " J#4"5<M '*)%%JKOS  '))OS  '))%(us{(GUS[#&S&He## $$)*C$J*E3NK!$3$$$CIt L#*  L#* +,L#S	/D(,J $$'3 ' ' 
 
r5   r	   N)	r<   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__rH   r5   r3   <module>rM      sI    % . 3 # ?@>
' >
  A>
B 
r5   