
    iK                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zXLM configuration    )strict   )PreTrainedConfig)auto_docstringzFacebookAI/xlm-mlm-en-2048)
checkpointc                   &   e Zd ZU dZdZddddddd	d
ZdZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed <   d!Zeed"<   d#Zeed$<   d%Zeed&<   dZeed'<   d(Zeed)<   dZeed*<   d+Z ed+z  ed,<   dZ!eed-<   dZ"eez  ed.<   d%Z#eed/<   d%Z$eed0<   d1Z%ed+z  ed2<   d1Z&eed3<   d4Z'ed+z  ed	<   d1Z(ed+z  ed<   dZ)ee*e   z  d+z  ed<   dZ+eed5<   y+)6	XLMConfigaw  
    gelu_activation (`bool`, *optional*, defaults to `True`):
        Whether or not to use *gelu* for the activations instead of *relu*.
    sinusoidal_embeddings (`bool`, *optional*, defaults to `False`):
        Whether or not to use sinusoidal positional embeddings instead of absolute positional embeddings.
    causal (`bool`, *optional*, defaults to `False`):
        Whether or not the model should behave in a causal manner. Causal models use a triangular attention mask in
        order to only attend to the left-side context instead if a bidirectional context.
    asm (`bool`, *optional*, defaults to `False`):
        Whether or not to use an adaptive log softmax projection layer instead of a linear layer for the prediction
        layer.
    n_langs (`int`, *optional*, defaults to 1):
        The number of languages the model handles. Set to 1 for monolingual models.
    use_lang_emb (`bool`, *optional*, defaults to `True`):
        Whether to use language embeddings. Some models use additional language embeddings, see [the multilingual
        models page](http://huggingface.co/transformers/multilingual.html#xlm-language-embeddings) for information
        on how to use them.
    embed_init_std (`float`, *optional*, defaults to 2048^-0.5):
        The standard deviation of the truncated_normal_initializer for initializing the embedding matrices.
    unk_index (`int`, *optional*, defaults to 3):
        The index of the unknown token in the vocabulary.
    mask_index (`int`, *optional*, defaults to 5):
        The index of the masking token in the vocabulary.
    is_encoder (`bool`, *optional*, defaults to `True`):
        Whether or not the initialized model should be a transformer encoder or decoder as seen in Vaswani et al.
    summary_type (`string`, *optional*, defaults to "first"):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        Has to be one of the following options:
            - `"last"`: Take the last token hidden state (like XLNet).
            - `"first"`: Take the first token hidden state (like BERT).
            - `"mean"`: Take the mean of all tokens hidden states.
            - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
            - `"attn"`: Not implemented now, use multi-head attention.
    summary_use_proj (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        Whether or not to add a projection after the vector extraction.
    summary_activation (`str`, *optional*):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
    summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
        Used in the sequence classification and multiple choice models.
        Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
    summary_first_dropout (`float`, *optional*, defaults to 0.1):
        Used in the sequence classification and multiple choice models.
        The dropout ratio to be used after the projection and activation.
    start_n_top (`int`, *optional*, defaults to 5):
        Used in the SQuAD evaluation script.
    end_n_top (`int`, *optional*, defaults to 5):
        Used in the SQuAD evaluation script.
    mask_token_id (`int`, *optional*, defaults to 0):
        Model agnostic parameter to identify masked tokens when generating text in an MLM context.
    lang_id (`int`, *optional*, defaults to 1):
        The ID of the language used by the model. This parameter is used when generating text in a given language.

    Examples:

    ```python
    >>> from transformers import XLMConfig, XLMModel

    >>> # Initializing a XLM configuration
    >>> configuration = XLMConfig()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = XLMModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```xlmemb_dimn_headsn_layers
vocab_sizebos_token_ideos_token_idpad_token_id)hidden_sizenum_attention_headsnum_hidden_layersn_words	bos_index	eos_index	pad_indexiu  i         g?dropoutattention_dropoutTgelu_activationFsinusoidal_embeddingscausalasm   n_langsuse_lang_embi   max_position_embeddingsg;f?embed_init_stdg-q=layer_norm_epsg{Gz?init_stdr   	unk_index   
mask_index
is_encoderfirstsummary_typesummary_use_projNsummary_activationsummary_proj_to_labelssummary_first_dropoutstart_n_top	end_n_topr   mask_token_idlang_id   tie_word_embeddings),__name__
__module____qualname____doc__
model_typeattribute_mapr   int__annotations__r   r   r   r   floatr   r   boolr   r   r    r"   r#   r$   r%   r&   r'   r(   r*   r+   r-   strr.   r/   r0   r1   r2   r3   r4   r5   r   r   r   listr7        z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/xlm/configuration_xlm.pyr	   r	      s   CJ J ('###M JGSHcGSGUS[%(us{( OT "'4'FDCGSL$#&S&&NE&!NE!HeIsJJL#!d!%)d
)#'D'),53;,KIs !M3:!GS L#*  L#* +,L#S	/D(, $$rE   r	   N)	r;   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__rD   rE   rF   <module>rK      sJ     . 3 # 78p%  p%  9p%f -rE   