
    i                     j    d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZy
)zVilT model configuration    )strict   )PreTrainedConfig)auto_docstringzdandelin/vilt-b32-mlm)
checkpointc                       e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeee   z  eeef   z  ed<   dZeee   z  eeef   z  ed<   dZeed<   d Zeed!<   d"Zeed#<   d Zeed$<   d"Zeed%<   d&Z ed&z  ed'<    fd(Z! xZ"S ))
ViltConfiga  
    modality_type_vocab_size (`int`, *optional*, defaults to 2):
        The vocabulary size of the modalities passed when calling [`ViltModel`]. This is used after concatenating the
        embeddings of the text and image modalities.
    max_image_length (`int`, *optional*, defaults to -1):
        The maximum number of patches to take as input for the Transformer encoder. If set to a positive integer,
        the encoder will sample `max_image_length` patches at maximum. If set to -1, will not be taken into
        account.
    num_images (`int`, *optional*, defaults to -1):
        The number of images to use for natural language visual reasoning. If set to a positive integer, will be
        used by [`ViltForImagesAndTextClassification`] for defining the classifier head.

    Example:

    ```python
    >>> from transformers import ViLTModel, ViLTConfig

    >>> # Initializing a ViLT dandelin/vilt-b32-mlm style configuration
    >>> configuration = ViLTConfig()

    >>> # Initializing a model from the dandelin/vilt-b32-mlm style configuration
    >>> model = ViLTModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```vilti:w  
vocab_size   type_vocab_sizemodality_type_vocab_size(   max_position_embeddingsi   hidden_size   num_hidden_layersnum_attention_headsi   intermediate_sizegelu
hidden_actg        hidden_dropout_probattention_probs_dropout_probg{Gz?initializer_rangeg-q=layer_norm_epsi  
image_size    
patch_sizer   num_channelsTqkv_biasmax_image_lengthtie_word_embeddings
num_imagesNpad_token_idc                 V    |j                  dd        d| _        t        |   di | y )Nr#   T )popr#   super__post_init__)selfkwargs	__class__s     |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/vilt/configuration_vilt.pyr*   zViltConfig.__post_init__L   s*    

($/#' ''    )#__name__
__module____qualname____doc__
model_typer   int__annotations__r   r   r   r   r   r   r   r   strr   floatr   r   r   r   listtupler   r   r    boolr"   r#   r$   r%   r*   __classcell__)r-   s   @r.   r	   r	      s+   6 JJOS$%c%#%S%Ks!!!s!J'**03 %#+3#u#!NE!47Jd3i%S/1746Jd3i%S/16L#Hdc $$J#L#*#( (r/   r	   N)	r3   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r'   r/   r.   <module>rA      sG     . 3 # 237(! 7(  47(t .r/   