
    ie                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zOpenAI GPT configuration    )strict   )PreTrainedConfig)auto_docstringzopenai-community/openai-gpt)
checkpointc                   x   e Zd ZU dZdZdddddZdZeed	<   d
Z	eed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   dZeez  ed<   dZedz  ed<   dZedz  ed <   dZeee   z  dz  ed!<   dZeed"<   y)#OpenAIGPTConfiga	  
    afn (`str` or `Callable`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"silu"` and `"gelu_new"` are supported.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
        The epsilon to use in the layer normalization layers
    summary_type (`str`, *optional*, defaults to `"cls_index"`):
        Argument used when doing sequence summary, used in the models [`OpenAIGPTDoubleHeadsModel`] and
        [`OpenAIGPTDoubleHeadsModel`].
        Has to be one of the following options:
            - `"last"`: Take the last token hidden state (like XLNet).
            - `"first"`: Take the first token hidden state (like BERT).
            - `"mean"`: Take the mean of all tokens hidden states.
            - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
            - `"attn"`: Not implemented now, use multi-head attention.
    summary_use_proj (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary, used in the models [`OpenAIGPTDoubleHeadsModel`] and
        [`OpenAIGPTDoubleHeadsModel`].
        Whether or not to add a projection after the vector extraction.
    summary_activation (`str`, *optional*):
        Argument used when doing sequence summary, used in the models [`OpenAIGPTDoubleHeadsModel`] and
        [`OpenAIGPTDoubleHeadsModel`].
        Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
    summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary, used in the models [`OpenAIGPTDoubleHeadsModel`] and
        [`OpenAIGPTDoubleHeadsModel`].
        Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
    summary_first_dropout (`float`, *optional*, defaults to 0.1):
        Argument used when doing sequence summary, used in the models [`OpenAIGPTDoubleHeadsModel`] and
        [`OpenAIGPTDoubleHeadsModel`].
        The dropout ratio to be used after the projection and activation.

    Examples:

    ```python
    >>> from transformers import OpenAIGPTConfig, OpenAIGPTModel

    >>> # Initializing a GPT configuration
    >>> configuration = OpenAIGPTConfig()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = OpenAIGPTModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```z
openai-gptn_positionsn_embdn_headn_layer)max_position_embeddingshidden_sizenum_attention_headsnum_hidden_layersi  
vocab_sizei   i      geluafng?resid_pdrop
embd_pdrop
attn_pdropgh㈵>layer_norm_epsilong{Gz?initializer_range	cls_indexsummary_typeTsummary_use_projNsummary_activationsummary_proj_to_labelssummary_first_dropoutpad_token_idbos_token_ideos_token_idtie_word_embeddings) __name__
__module____qualname____doc__
model_typeattribute_mapr   int__annotations__r
   r   r   r   r   strr   floatr   r   r   r   r   r   boolr   r   r    r!   r"   r#   listr$        /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/openai/configuration_openai.pyr	   r	      s   -^ J#0'&	M JKFCGSFCC"K"!J!!J! $$#u##L##!d!%)d
)#'D'),53;,#L#*##L#*#+/L#S	/D(/ $$r2   r	   N)	r(   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r1   r2   r3   <module>r8      sK     . 3 # 89K%& K%  :K%\ 
r2   