
    i                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zELECTRA model configuration    )strict   )PreTrainedConfig)auto_docstringz"google/electra-small-discriminator)
checkpointc                      e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   d	Z
eed
<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed <   dZeez  ed!<   d"Zed#z  ed$<   dZeed%<   d#Zeez  d#z  ed&<   d'Zeed(<   d'Zeed)<   d#Z ed#z  ed*<   d#Z!ee"e   z  d#z  ed+<   dZ#eed,<   y#)-ElectraConfiga  
    summary_type (`str`, *optional*, defaults to `"first"`):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        Has to be one of the following options:
            - `"last"`: Take the last token hidden state (like XLNet).
            - `"first"`: Take the first token hidden state (like BERT).
            - `"mean"`: Take the mean of all tokens hidden states.
            - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
            - `"attn"`: Not implemented now, use multi-head attention.
    summary_use_proj (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        Whether or not to add a projection after the vector extraction.
    summary_activation (`str`, *optional*):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        Pass `"gelu"` for a gelu activation to the output, any other value will result in no activation.
    summary_last_dropout (`float`, *optional*, defaults to 0.0):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
        The dropout ratio to be used after the projection and activation.

    Examples:

    ```python
    >>> from transformers import ElectraConfig, ElectraModel

    >>> # Initializing a ELECTRA electra-base-uncased style configuration
    >>> configuration = ElectraConfig()

    >>> # Initializing a model (with random weights) from the electra-base-uncased style configuration
    >>> model = ElectraModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```electrai:w  
vocab_size   embedding_size   hidden_size   num_hidden_layers   num_attention_headsi   intermediate_sizegelu
hidden_actg?hidden_dropout_probattention_probs_dropout_probi   max_position_embeddings   type_vocab_sizeg{Gz?initializer_rangeg-q=layer_norm_epsfirstsummary_typeTsummary_use_projsummary_activationsummary_last_dropoutr   Npad_token_id	use_cacheclassifier_dropoutF
is_decoderadd_cross_attentionbos_token_ideos_token_idtie_word_embeddings)$__name__
__module____qualname____doc__
model_typer   int__annotations__r   r   r   r   r   r   strr   floatr   r   r   r   r   r   r    boolr!   r"   r#   r$   r%   r&   r'   r(   r)   listr*        /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/electra/configuration_electra.pyr	   r	      s@    D JJNCKs  !s!J'**03 %#+3#&S&OS#u#!NE!L#!d!$$(+%#++ L#* It-1d*1J %%#L#*#+/L#S	/D(/ $$r7   r	   N)	r.   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r6   r7   r8   <module>r=      sI    " . 3 # ?@=%$ =%  A=%@ 
r7   