
    i)                        d Z ddlmZ ddlmZ ddlmZmZ  ej                  e	      Z
 ed      e G d d	e                    Z ed      e G d
 de                    Z ed      e G d de                    Zg dZy)zCLIP model configuration    )strict   )PreTrainedConfig)auto_docstringloggingzopenai/clip-vit-base-patch32)
checkpointc                   .   e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   dZed
z  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZed
z  ed<   dZeez  d
z  ed<   dZeed<   dZed
z  ed<   dZed
z  ed<   dZed
z  ed<   d Zeee   z  d
z  ed!<   d" Zy
)#CLIPTextConfiga  
    Example:

    ```python
    >>> from transformers import CLIPTextConfig, CLIPTextModel

    >>> # Initializing a CLIPTextConfig with openai/clip-vit-base-patch32 style configuration
    >>> configuration = CLIPTextConfig()

    >>> # Initializing a CLIPTextModel (with random weights) from the openai/clip-vit-base-patch32 style configuration
    >>> model = CLIPTextModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```clip_text_modeltext_configi   
vocab_size   hidden_sizei   intermediate_sizeNprojection_dim   num_hidden_layers   num_attention_headsM   max_position_embeddings
quick_gelu
hidden_acth㈵>layer_norm_eps        attention_dropout{Gz?initializer_range      ?initializer_factor   pad_token_idi  bos_token_idi  eos_token_idc                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      yzOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).Nr   r   
ValueErrorselfs    |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/clip/configuration_clip.pyvalidate_architecturez$CLIPTextConfig.validate_architectureB   S    d666!;#D$4$4#5 622327  <    )__name__
__module____qualname____doc__
model_typebase_config_keyr   int__annotations__r   r   r   r   r   r   r   strr   floatr   r   r!   r#   r$   r%   listr-    r/   r,   r
   r
      s      #J#OJK!s!!$NC$J$s  #%S%"J"#'NEDL',/sU{T)/#u#'**  !L#* $L#*$+0L#S	/D(0r/   r
   c                      e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
ed	z  ed
<   dZeed<   dZeed<   dZed	z  ed<   dZed	z  ed<   dZed	z  ed<   dZeed<   dZed	z  ed<   dZeez  d	z  ed<   dZeed<   dZed	z  ed<   d Zy	)CLIPVisionConfiga  
    Example:

    ```python
    >>> from transformers import CLIPVisionConfig, CLIPVisionModel

    >>> # Initializing a CLIPVisionConfig with openai/clip-vit-base-patch32 style configuration
    >>> configuration = CLIPVisionConfig()

    >>> # Initializing a CLIPVisionModel (with random weights) from the openai/clip-vit-base-patch32 style configuration
    >>> model = CLIPVisionModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```clip_vision_modelvision_configi   r   i   r   r   Nr   r   r   r   r   num_channels   
image_size    
patch_sizer   r   r   r   r   r   r   r   r    r!   c                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      yr'   r(   r*   s    r,   r-   z&CLIPVisionConfig.validate_architectureo   r.   r/   )r0   r1   r2   r3   r4   r5   r   r6   r7   r   r   r   r   r@   rB   rD   r   r8   r   r9   r   r   r!   r-   r;   r/   r,   r=   r=   K   s      %J%OK!s!!$NC$J$s!! L#*  Jd
 Jd
"J"#'NEDL',/sU{T)/#u#'**r/   r=   c                        e Zd ZU dZdZeedZdZe	ez  dz  e
d<   dZe	ez  dz  e
d<   dZedz  e
d<   d	Zeez  dz  e
d
<   dZedz  e
d<    fdZ xZS )
CLIPConfiga  
    text_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`CLIPTextConfig`].
    vision_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`CLIPVisionConfig`].
    logit_scale_init_value (`float | int`, *optional*, defaults to 2.6592):
        The initial value of the *logit_scale* parameter. Default is used as per the original CLIP implementation.

    Example:

    ```python
    >>> from transformers import CLIPConfig, CLIPModel

    >>> # Initializing a CLIPConfig with openai/clip-vit-base-patch32 style configuration
    >>> configuration = CLIPConfig()

    >>> # Initializing a CLIPModel (with random weights) from the openai/clip-vit-base-patch32 style configuration
    >>> model = CLIPModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config

    >>> # We can also initialize a CLIPConfig from a CLIPTextConfig and a CLIPVisionConfig
    >>> from transformers import CLIPTextConfig, CLIPVisionConfig

    >>> # Initializing a CLIPText and CLIPVision configuration
    >>> config_text = CLIPTextConfig()
    >>> config_vision = CLIPVisionConfig()

    >>> config = CLIPConfig(text_config=config_text, vision_config=config_vision)
    ```clip)r   r?   Nr   r?   r   r   g/L
F@logit_scale_init_valuer    r!   c                    | j                   i }t        j                  d       nAt        | j                   t              r| j                   j                         }n| j                   }| j                  i }t        j                  d       nAt        | j                  t              r| j                  j                         }n| j                  }|j                  dd       }|j                  dd       }|t	        di |j                         }|j                         D ]B  \  }}||v s|||   k7  s|dk7  s||v r
d| d| d}	nd	| d
}	t        j                  |	       D |j                  |       |t        di |j                         }
d|
v r3|
d   j                         D ci c]  \  }}t        |      | c}}|
d<   |
j                         D ]B  \  }}||v s|||   k7  s|dk7  s||v r
d| d| d}	nd| d
}	t        j                  |	       D |j                  |
       t	        di || _         t        di || _        t        | 4  di | y c c}}w )NzO`text_config` is `None`. Initializing the `CLIPTextConfig` with default values.zS`vision_config` is `None`. initializing the `CLIPVisionConfig` with default values.text_config_dictvision_config_dicttransformers_version`zp` is found in both `text_config_dict` and `text_config` but with different values. The value `text_config_dict["z"]` will be used instead.zj`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["z"]` will be overridden.id2labelzv` is found in both `vision_config_dict` and `vision_config` but with different values. The value `vision_config_dict["zp`vision_config_dict` is provided which will be used to initialize `CLIPVisionConfig`. The value `vision_config["r;   )r   loggerinfo
isinstancer
   to_dictr?   r=   popitemsupdater8   super__post_init__)r+   kwargsr   r?   rK   rL   _text_config_dictkeyvaluemessage_vision_config_dict	__class__s              r,   rX   zCLIPConfig.__post_init__   s   #KKKij((.9**224K**K%MKKmn**,<= ..668M ..M "::&8$?#ZZ(<dC' . B1A B J J L 0557 )
U+%%;s3C*COeHe..u %<<?5@Y[  336%7NP   KK()" 01)"2"H5G"H"P"P"R006I*6U6[6[6]3(2UCHeO3#J/
 2779 )
U-'E]35G,GCSiLi00u %FFIUJce  99<=TV   KK()"   !45 *8K8->>''93s   &I)r0   r1   r2   r3   r4   r
   r=   sub_configsr   dictr7   r?   r   r6   rI   r9   r!   rX   __classcell__)r_   s   @r,   rG   rG   x   s    @ J"0CSTK04K&-448M4**T18!$NC$J$17ECK$.7'**Q( Q(r/   rG   )rG   r
   r=   N)r3   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   
get_loggerr0   rP   r
   r=   rG   __all__r;   r/   r,   <module>rh      s     . 3 , 
		H	% 9:-% -  ;-` 9:(' (  ;(V 9:{(! {(  ;{(| ?r/   