
    i(                        d Z ddlmZ ddlmZ ddlmZmZ  ej                  e	      Z
 ed      e G d d	e                    Z ed      e G d
 de                    Z ed      e G d de                    Zg dZy)zCLIPSeg model configuration    )strict   )PreTrainedConfig)auto_docstringloggingzCIDAS/clipseg-rd64)
checkpointc                      e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   dZedz  ed<   dZedz  ed<   dZeee   z  dz  ed <   y)!CLIPSegTextConfiga  
    Example:

    ```python
    >>> from transformers import CLIPSegTextConfig, CLIPSegTextModel

    >>> # Initializing a CLIPSegTextConfig with CIDAS/clipseg-rd64 style configuration
    >>> configuration = CLIPSegTextConfig()

    >>> # Initializing a CLIPSegTextModel (with random weights) from the CIDAS/clipseg-rd64 style configuration
    >>> model = CLIPSegTextModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```clipseg_text_modeltext_configi   
vocab_size   hidden_size   intermediate_size   num_hidden_layers   num_attention_headsM   max_position_embeddings
quick_gelu
hidden_acth㈵>layer_norm_eps        attention_dropout{Gz?initializer_range      ?initializer_factor   Npad_token_idi  bos_token_idi  eos_token_id)__name__
__module____qualname____doc__
model_typebase_config_keyr   int__annotations__r   r   r   r   r   r   strr   floatr   r   r!   r#   r$   r%   list     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/clipseg/configuration_clipseg.pyr
   r
      s      &J#OJK!s!s  #%S%"J" NE %(us{(#u# ## L#* $L#*$+0L#S	/D(0r2   r
   c                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   dZeed
<   dZeed<   dZeee   z  eeef   z  ed<   dZeee   z  eeef   z  ed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   y)CLIPSegVisionConfiga  
    Example:

    ```python
    >>> from transformers import CLIPSegVisionConfig, CLIPSegVisionModel

    >>> # Initializing a CLIPSegVisionConfig with CIDAS/clipseg-rd64 style configuration
    >>> configuration = CLIPSegVisionConfig()

    >>> # Initializing a CLIPSegVisionModel (with random weights) from the CIDAS/clipseg-rd64 style configuration
    >>> model = CLIPSegVisionModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```clipseg_vision_modelvision_configi   r   i   r   r   r   r   r   num_channels   
image_size    
patch_sizer   r   r   r   r   r   r   r   r    r!   N)r&   r'   r(   r)   r*   r+   r   r,   r-   r   r   r   r8   r:   r0   tupler<   r   r.   r   r/   r   r   r!   r1   r2   r3   r5   r5   ?   s      (J%OK!s!s!!L#47Jd3i%S/1746Jd3i%S/16"J" NE %(us{(#u# ##r2   r5   c                   4    e Zd ZU dZdZeedZdZe	ez  dz  e
d<   dZe	ez  dz  e
d<   dZedz  e
d<   d	Zeez  dz  e
d
<   dZedz  e
d<   dZee   eedf   z  e
d<   dZee
d<   dZee
d<   dZeez  e
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<    fdZ xZS )CLIPSegConfiga  
    extract_layers (`list[int]`, *optional*, defaults to `[3, 6, 9]`):
        Layers to extract when forwarding the query image through the frozen visual backbone of CLIP.
    reduce_dim (`int`, *optional*, defaults to 64):
        Dimensionality to reduce the CLIP vision embedding.
    conditional_layer (`int`, *optional*, defaults to 0):
        The layer to use of the Transformer encoder whose activations will be combined with the condition
        embeddings using FiLM (Feature-wise Linear Modulation). If 0, the last layer is used.
    use_complex_transposed_convolution (`bool`, *optional*, defaults to `False`):
        Whether to use a more complex transposed convolution in the decoder, enabling more fine-grained
        segmentation..

    Example:

    ```python
    >>> from transformers import CLIPSegConfig, CLIPSegModel

    >>> # Initializing a CLIPSegConfig with CIDAS/clipseg-rd64 style configuration
    >>> configuration = CLIPSegConfig()

    >>> # Initializing a CLIPSegModel (with random weights) from the CIDAS/clipseg-rd64 style configuration
    >>> model = CLIPSegModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config

    >>> # We can also initialize a CLIPSegConfig from a CLIPSegTextConfig and a CLIPSegVisionConfig

    >>> # Initializing a CLIPSegText and CLIPSegVision configuration
    >>> config_text = CLIPSegTextConfig()
    >>> config_vision = CLIPSegVisionConfig()

    >>> config = CLIPSegConfig(text_config=config_text, vision_config=config_vision)
    ```clipseg)r   r7   Nr   r7   r   projection_dimg/L
F@logit_scale_init_valuer    r!   )r      	   .extract_layers@   
reduce_dim   decoder_num_attention_headsr   decoder_attention_dropoutr   decoder_hidden_actr   decoder_intermediate_sizer   conditional_layerF"use_complex_transposed_convolutionc                    | j                   i }t        j                  d       nAt        | j                   t              r| j                   j                         }n| j                   }| j                  i }t        j                  d       nAt        | j                  t              r| j                  j                         }n| j                  }|j                  dd       }|j                  dd       }|t	        di |j                         }|j                         D ]B  \  }}||v s|||   k7  s|dk7  s||v r
d| d| d}	nd	| d
}	t        j                  |	       D |j                  |       |t        di |j                         }
d|
v r3|
d   j                         D ci c]  \  }}t        |      | c}}|
d<   |
j                         D ]B  \  }}||v s|||   k7  s|dk7  s||v r
d| d| d}	nd| d
}	t        j                  |	       D |j                  |
       t	        di || _         t        di || _        t        | 4  di | y c c}}w )NzR`text_config` is `None`. Initializing the `CLIPSegTextConfig` with default values.zV`vision_config` is `None`. initializing the `CLIPSegVisionConfig` with default values.text_config_dictvision_config_dicttransformers_version`zp` is found in both `text_config_dict` and `text_config` but with different values. The value `text_config_dict["z"]` will be used instead.zm`text_config_dict` is provided which will be used to initialize `CLIPSegTextConfig`. The value `text_config["z"]` will be overridden.id2labelzv` is found in both `vision_config_dict` and `vision_config` but with different values. The value `vision_config_dict["zs`vision_config_dict` is provided which will be used to initialize `CLIPSegVisionConfig`. The value `vision_config["r1   )r   loggerinfo
isinstancer
   to_dictr7   r5   popitemsupdater.   super__post_init__)selfkwargsr   r7   rP   rQ   _text_config_dictkeyvaluemessage_vision_config_dict	__class__s              r3   r]   zCLIPSegConfig.__post_init__   s   #KKKlm((*;<**224K**K%MKKpq**,?@ ..668M ..M "::&8$?#ZZ(<dC' 1 E4D E M M O 0557 )
U+%%;s3C*COeHe..u %<<?5@Y[  336%7NP   KK()" 01)"5"K8J"K"S"S"U006I*6U6[6[6]3(2UCHeO3#J/
 2779 )
U-'E]35G,GCSiLi00u %FFIUJce  99<=TV   KK()"   !45 -;{;0A=A''93s   &I)r&   r'   r(   r)   r*   r
   r5   sub_configsr   dictr-   r7   rA   r,   rB   r/   r!   rE   r0   r=   rG   rI   rJ   rK   r.   rL   rM   rN   boolr]   __classcell__)re   s   @r3   r?   r?   c   s    !F J"3FYZK37K))D077;M4--4;!$NC$J$17ECK$.7'**2;NDIc3h/;J'((-0us{0**%)s)s/4&4Q( Q(r2   r?   )r?   r
   r5   N)r)   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   
get_loggerr&   rU   r
   r5   r?   __all__r1   r2   r3   <module>ro      s    " . 3 , 
		H	% /0!1( !1  1!1H /0$* $  1$D /0F($ F(  1F(R Hr2   