
    i                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zOpenAI GPT-2 configuration    )strict   )PreTrainedConfig)auto_docstringzopenai-community/gpt2)
checkpointc                      e Zd ZU dZdZdgZdddddZd	Zee	d
<   dZ
ee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZee	d<   dZeez  e	d<   dZeez  e	d<   dZeez  e	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZee	d<   dZeez  e	d <   dZee	d!<   dZee	d"<   d#Zedz  e	d$<   d#Z ee!e   z  dz  e	d%<   dZ"edz  e	d&<   d'Z#ee	d(<   d'Z$ee	d)<   d'Z%ee	d*<   dZ&ee	d+<   y),
GPT2ConfigaH	  
    summary_type (`string`, *optional*, defaults to `"cls_index"`):
        Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
        Has to be one of the following options:
            - `"last"`: Take the last token hidden state (like XLNet).
            - `"first"`: Take the first token hidden state (like BERT).
            - `"mean"`: Take the mean of all tokens hidden states.
            - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
            - `"attn"`: Not implemented now, use multi-head attention.
    summary_use_proj (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
        Whether or not to add a projection after the vector extraction.
    summary_activation (`str`, *optional*):
        Argument used when doing sequence summary. Used in for the multiple choice head in
        [`GPT2DoubleHeadsModel`].
        Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
    summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
        Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
    summary_first_dropout (`float`, *optional*, defaults to 0.1):
        Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
        The dropout ratio to be used after the projection and activation.
    scale_attn_by_inverse_layer_idx (`bool`, *optional*, defaults to `False`):
        Whether to additionally scale attention weights by `1 / layer_idx + 1`.
    reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
        Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention
        dot-product/softmax to float() when training with mixed precision.

    Example:

    ```python
    >>> from transformers import GPT2Config, GPT2Model

    >>> # Initializing a GPT2 configuration
    >>> configuration = GPT2Config()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = GPT2Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gpt2past_key_valuesn_embdn_positionsn_headn_layer)hidden_sizemax_position_embeddingsnum_attention_headsnum_hidden_layersiQ  
vocab_sizei   i      Nn_innergelu_newactivation_functiong?resid_pdrop
embd_pdrop
attn_pdropgh㈵>layer_norm_epsilong{Gz?initializer_range	cls_indexsummary_typeTsummary_use_projsummary_activationsummary_proj_to_labelssummary_first_dropoutscale_attn_weights	use_cacheiP  bos_token_ideos_token_idpad_token_idFscale_attn_by_inverse_layer_idxreorder_and_upcast_attnadd_cross_attentiontie_word_embeddings)'__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   strr   floatr   r   r   r   r   r    boolr!   r"   r#   r$   r%   r&   r'   listr(   r)   r*   r+   r,        |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/gpt2/configuration_gpt2.pyr	   r	      sj   )V J#4"5#0'&	M JKFCGSFCGS4Z))"K"!J!!J! $$#u##L##!d!%)d
)#'D'),53;,##It$L#*$+0L#S	/D(0#L#*#,1#T1$)T) %% $$r;   r	   N)	r0   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r:   r;   r<   <module>rA      sJ    ! . 3 # 23N%! N%  4N%b .r;   