
    i{I                        d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	  e	j                  e      Z ed      e G d	 d
e                    Z ed      e G d de                    Z ed      e G d de                    Z ed      e G d de                    Z ed      e G d de                    Zg dZy)zFLAVA model configurations    )Any)strict   )PreTrainedConfig)auto_docstringloggingzfacebook/flava-full)
checkpointc                   0   e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed<   d	Zeed
<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeee   z  eeef   z  ed<   dZeee   z  eeef   z  ed<   dZeed<   dZeed<   dZeed<   dZeed<   y) FlavaImageConfigaT  
    mask_token (`bool`, *optional*, defaults to `True`):
        Whether to use a mask token or not. Used in MIM (Masked Image Modeling) loss for FLAVA.

    Example:

    ```python
    >>> from transformers import FlavaImageConfig, FlavaImageModel

    >>> # Initializing a FlavaImageModel with  style configuration
    >>> configuration = FlavaImageConfig()

    >>> # Initializing a FlavaImageModel model (with random weights) from the style configuration
    >>> model = FlavaImageModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```flava_image_modelimage_config   hidden_size   num_hidden_layersnum_attention_heads   intermediate_sizegelu
hidden_act        hidden_dropout_probattention_probs_dropout_prob{Gz?initializer_range-q=layer_norm_eps   
image_size   
patch_sizer   num_channelsTqkv_bias
mask_token    
vocab_sizeN)__name__
__module____qualname____doc__
model_typebase_config_keyr   int__annotations__r   r   r   r   strr   floatr   r   r   r   listtupler!   r"   r#   boolr$   r&        ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/flava/configuration_flava.pyr   r      s    & %J$OKs!!!s!J'**03 %#+3#u#!NE!47Jd3i%S/1746Jd3i%S/16L#HdJJr5   r   c                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   y)FlavaTextConfiga  
    Example:

    ```python
    >>> from transformers import FlavaTextConfig, FlavaTextModel

    >>> # Initializing a FlavaTextModel with  style configuration
    >>> configuration = FlavaTextConfig()

    >>> # Initializing a FlavaTextModel model (with random weights) from the style configuration
    >>> model = FlavaTextModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```flava_text_modeltext_configi:w  r&      type_vocab_sizei   max_position_embeddingsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Npad_token_idTr#   )r'   r(   r)   r*   r+   r,   r&   r-   r.   r<   r=   r   r   r   r   r   r/   r   r0   r   r   r   r>   r#   r3   r4   r5   r6   r8   r8   E   s      $J#OJOS#&S&Ks!!!s!J'**03 %#+3#u#!NE! L#* Hdr5   r8   c                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   y)FlavaMultimodalConfiga{  
    use_cls_token (`bool`, *optional*, defaults to `True`):
        Whether to use an extra CLS token for multimodal settings. Usually needed by the FLAVA model.

    Example:

    ```python
    >>> from transformers import FlavaMultimodalConfig, FlavaMultimodalModel

    >>> # Initializing a FlavaMultimodalModel with  style configuration
    >>> configuration = FlavaMultimodalConfig()

    >>> # Initializing a FlavaMultimodalModel model (with random weights) from the style configuration
    >>> model = FlavaMultimodalModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```flava_multimodal_modelmultimodal_configr   r      r   r   r   r   r   r   r   r   r   r   r   r   r   r   Tr#   use_cls_tokenN)r'   r(   r)   r*   r+   r,   r   r-   r.   r   r   r   r   r/   r   r0   r   r   r   r#   r3   rD   r4   r5   r6   r@   r@   k   s    & *J)OKs!!!s!J'**03 %#+3#u#!NE!HdM4r5   r@   c                   t    e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	eed	<   d
Z
eed<   dZeed<   dZeed<   y)FlavaImageCodebookConfigas  
    num_groups (`int`, *optional*, defaults to 4):
        Number of groups to be created. This parameter as of now doesn't affect the model and is used for some
        internal calculation and estimations.
    num_blocks_per_group (`int`, *optional*, defaults to 2):
        Number of conv-based blocks per group.
    freeze (`bool`, defaults to `True`):
        Whether to freeze the weights of the model.

    Example:

    ```python
    >>> from transformers import FlavaImageCodebookConfig, FlavaImageCodebook

    >>> # Initializing a FlavaImageCodebook with style configuration
    >>> configuration = FlavaImageCodebookConfig()

    >>> # Initializing a FlavaImageCodebook model (with random weights) from the style configuration
    >>> model = FlavaImageCodebook(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
       
num_groupsr   input_channelsr;   num_blocks_per_group   r   r%   r&   Tfreezer   r   N)r'   r(   r)   r*   rH   r-   r.   rI   rJ   r   r&   rL   r3   r   r0   r4   r5   r6   rF   rF      sQ    0 JNC !#!KJFD#u#r5   rF   c                       e Zd ZU dZdZeeeedZ	dZ
eeef   ez  dz  ed<   dZeeef   ez  dz  ed<   dZeeef   ez  dz  ed<   dZeeef   ez  dz  ed<   d	Zeed
<   dZeed<   d	Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZ eed<   dZ!eed<   dZ"eed<   dZ#eed<   dZ$eed<   dZ%eed<   dZ&eed <   dZ'eed!<    fd"Z( xZ)S )#FlavaConfiga 
  
    image_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`FlavaImageConfig`].
    multimodal_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`FlavaMultimodalConfig`].
    image_codebook_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`FlavaCodebookConfig`].
    init_codebook (`bool`, *optional*, defaults to `True`):
        Whether to initialize the codebook
    logit_scale_init_value (`float`, *optional*, defaults to 2.6592):
        The initial value of the *logit_scale* parameter. Default is used as per the original FLAVA/CLIP
        implementation.
    ce_ignore_index (`int`, *optional*, defaults to -100):
        Cross entropy index to ignore.
    mim_weight (`float`, *optional*, defaults to 1.0):
        Weight to be assigned to MIM (Masked Image Modeling) unimodal loss
    mlm_weight (`float`, *optional*, defaults to 1.0):
        Weight to be assigned to MLM (Masked Language Modeling) unimodal loss
    global_contrastive_weight (`float`, *optional*, defaults to 1.0):
        Weight to be assigned to global contrastive cross-alignment loss.
    itm_weight (`float`, *optional*, defaults to 1.0):
        Weight to be assigned to image-text matching multimodal loss.
    mmm_image_weight (`float`, *optional*, defaults to 1.0):
        Weight to be assigned to MMM loss's image part.
    mmm_text_weight (`float`, *optional*, defaults to 1.0):
        Weight to be assigned to MMM loss's text part.
    global_backprop_contrastive (`bool`, *optional*, defaults to `True`):
        Whether to use global backpropgation through all workers in contrastive loss.
    skip_unmasked_multimodal_encoder (`bool`, *optional*, defaults to `True`):
        Whether to skip running unmasked multimodal encoder whose outputs are not used by FLAVA losses.
    return_loss (`bool`, *optional*, defaults to `True`):
        Whether to return loss or not

    Example:

    ```python
    >>> from transformers import FlavaConfig, FlavaModel, FlavaForPreTraining

    >>> # Initializing a FlavaConfig with style configuration
    >>> configuration = FlavaConfig()

    >>> # Initializing a FlavaModel and FlavaForPreTraining model (with random weights) from the style configuration
    >>> model = FlavaModel(configuration)
    >>> model_pre = FlavaForPreTraining(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    >>> configuration_pre = model_pre.config
    ```
    flava)r:   r   rB   image_codebook_configNr   r:   rB   rP   r   r   r   r   projection_dimTinit_codebookg/L
F@logit_scale_init_valuer   r   ice_ignore_indexg      ?
mim_weight
mlm_weightglobal_contrastive_weight
itm_weightmmm_image_weightmmm_text_weightglobal_backprop_contrastive skip_unmasked_multimodal_encoderreturn_losstie_word_embeddingsinitializer_factorc                    | j                   i }t        j                  d       nAt        | j                   t              r| j                   j                         }n| j                   }| j                  i }t        j                  d       nAt        | j                  t              r| j                  j                         }n| j                  }| j                  i }t        j                  d       nAt        | j                  t              r| j                  j                         }n| j                  }| j                  i }t        j                  d       nAt        | j                  t              r| j                  j                         }n| j                  }|j                  dd       }|j                  dd       }|j                  dd       }|j                  dd       }	|t	        di |j                         }
|
j                         D ]B  \  }}||v s|||   k7  s|d	k7  s||v r
d
| d| d}nd| d}t        j                  |       D |j                  |
       |t        di |j                         }d|v r3|d   j                         D ci c]  \  }}t        |      | c}}|d<   |j                         D ]B  \  }}||v s|||   k7  s|d	k7  s||v r
d
| d| d}nd| d}t        j                  |       D |j                  |       |t        di |j                         }|j                         D ]B  \  }}||v s|||   k7  s|d	k7  s||v r
d
| d| d}nd| d}t        j                  |       D |j                  |       |	t        di |	j                         }|j                         D ]B  \  }}||v s|||   k7  s|d	k7  s||	v r
d
| d| d}nd| d}t        j                  |       D |j                  |       t	        di || _         t        di || _        t        di || _        t        di || _
        t!        | D  di | y c c}}w )NzP`text_config` is `None`. Initializing the `FlavaTextConfig` with default values.zR`image_config` is `None`. initializing the `FlavaImageConfig` with default values.z\`multimodal_config` is `None`. Initializing the `FlavaMultimodalConfig` with default values.zc`image_codebook_config` is `None`. initializing the `FlavaImageCodebookConfig` with default values.text_config_dictimage_config_dictmultimodal_config_dictimage_codebook_config_dicttransformers_version`zp` is found in both `text_config_dict` and `text_config` but with different values. The value `text_config_dict["z"]` will be used instead.zk`text_config_dict` is provided which will be used to initialize `FlavaTextConfig`. The value `text_config["z"]` will be overridden.id2labelzs` is found in both `image_config_dict` and `image_config` but with different values. The value `image_config_dict["zn`image_config_dict` is provided which will be used to initialize `FlavaImageConfig`. The value `image_config["z` is found in both `multimodal_config_dict` and `multimodal_config` but with different values. The value `multimodal_config_dict["z}`multimodal_config_dict` is provided which will be used to initialize `FlavaMultimodalConfig`. The value `multimodal_config["z` is found in both `image_codebook_config_dict` and `image_codebook_config` but with different values. The value `image_codebook_config_dict["z`image_codebook_config_dict` is provided which will be used to initialize `FlavaImageCodebookConfig`. The value `image_codebook_config["r4   )r:   loggerinfo
isinstancer8   to_dictr   r   rB   r@   rP   rF   popitemsupdater/   super__post_init__)selfkwargsr:   r   rB   rP   ra   rb   rc   rd   _text_config_dictkeyvaluemessage_image_config_dict_multimodal_config_dict_image_codebook_config_dict	__class__s                    r6   rp   zFlavaConfig.__post_init__
  s   #KKKjk((/:**224K**K$LKKlm))+;<,,446L,,L!!) "KKvw..0EF $ 6 6 > > @ $ 6 6%%-$&!KKu 224LM$($>$>$F$F$H!$($>$>! "::&8$?"JJ':DA!',Dd!K%+ZZ0Ld%S"
 ' / C2B C K K M 0557 )
U+%%;s3C*COeHe..u %<<?5@Y[  336%7NP   KK()" 01(!1!F4E!F!N!N!P//6H6T6Z6Z6\2(2UCHeO2":.
 1668 )
U,&5L4E+E#QgJg//u %EEHEIbd  88;u<SU   KK()"  23!-&;&U>T&U&]&]&_# 6;;= )
U++9J39O0OTW[qTq44u %TTWSXXqs  VVYUZZqs   KK()" $$%<=%1*B*`E_*`*h*h*j' :??A )
U00!6s!;;55 88u %]]`\a b''  ]]`\aaxz   KK('), "(()DE +9[9,<|<!6!K9J!K%=%V@U%V"''k2s   Q))*r'   r(   r)   r*   r+   r8   r   r@   rF   sub_configsr   dictr/   r   r   r.   r:   rB   rP   r   r-   r   r0   rQ   rR   r3   rS   r   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   rp   __classcell__)rz   s   @r6   rN   rN      sd   1f J&(2!9	K >BL$sCx.#33d:A<@Kc3h"22T9@BFtCH~(884?FFJ4S>,<<tCJK!NE!NCM4$*E*#u#OSJJ'*u*J!e! OU (,,-1$d1K $$ ##^( ^(r5   rN   )rN   rF   r   r@   r8   N)r*   typingr   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   
get_loggerr'   rh   r   r8   r@   rF   rN   __all__r4   r5   r6   <module>r      s   !  . 3 , 
		H	% 01%' %  2%P 01!& !  2!H 01!, !  2!H 01$/ $  2$D 01q(" q(  2q(h vr5   