
    i                        d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Z ed      e G d
 de                    Z ed      e G d de                    Z	 ed      e G d de                    Z
g dZy)zSAM model configuration    )strict   )PreTrainedConfig)auto_docstringzfacebook/sam-vit-huge)
checkpointc                        e Zd ZU dZdZdZeed<   dZee	e   z  e
eef   z  ed<   dZee	e   z  e
eef   z  ed<   dZeed	<   d
Zeed<   dZeed<   dZeed<    fdZ xZS )SamPromptEncoderConfigz
    mask_input_channels (`int`, *optional*, defaults to 16):
        The number of channels to be fed to the `MaskDecoder` module.
    num_point_embeddings (`int`, *optional*, defaults to 4):
        The number of point embeddings to be used.
    prompt_encoder_config   hidden_size   
image_size   
patch_sizemask_input_channels   num_point_embeddingsgelu
hidden_actư>layer_norm_epsc                 `    | j                   | j                  z  | _        t        |   di | y N )r   r   image_embedding_sizesuper__post_init__selfkwargs	__class__s     z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/sam/configuration_sam.pyr   z$SamPromptEncoderConfig.__post_init__*   s(    $(OOt$F!''    )__name__
__module____qualname____doc__base_config_keyr   int__annotations__r   listtupler   r   r   r   strr   floatr   __classcell__r!   s   @r"   r	   r	      s     .OK48Jd3i%S/1846Jd3i%S/16!! !#!J NE ( (r#   r	   c                       e Zd ZU dZdZdZeed<   dZe	ed<   dZ
eed<   d	Zeed
<   dZeed<   d	Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   y)SamMaskDecoderConfiga  
    mlp_dim (`int`, *optional*, defaults to 2048):
        Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
    attention_downsample_rate (`int`, *optional*, defaults to 2):
        The downsampling rate of the attention layer.
    num_multimask_outputs (`int`, *optional*, defaults to 3):
        The number of outputs from the `SamMaskDecoder` module. In the Segment Anything paper, this is set to 3.
    iou_head_depth (`int`, *optional*, defaults to 3):
        The number of layers in the IoU head module.
    iou_head_hidden_dim (`int`, *optional*, defaults to 256):
        The dimensionality of the hidden states in the IoU head module.
    mask_decoder_configr   r   relur   i   mlp_dim   num_hidden_layers   num_attention_headsattention_downsample_rater   num_multimask_outputsiou_head_depthiou_head_hidden_dimr   r   N)r$   r%   r&   r'   r(   r   r)   r*   r   r-   r5   r7   r9   r:   r;   r<   r=   r   r.   r   r#   r"   r2   r2   /   sw     ,OKJGSs  %&s&!"3"NC"" NE r#   r2   c                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   dZeed
<   dZeed<   dZeee   z  eeef   z  ed<   dZeee   z  eeef   z  ed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed <   d!Zee   eed"f   z  ed#<   d$Zeed%<   d&Zed&z  ed'<    fd(Z  xZ!S ))SamVisionConfiga  
    output_channels (`int`, *optional*, defaults to 256):
        Dimensionality of the output channels in the Patch Encoder.
    use_rel_pos (`bool`, *optional*, defaults to `True`):
        Whether to use relative position embedding.
    window_size (`int`, *optional*, defaults to 14):
        Window size for relative position.
    global_attn_indexes (`list[int]`, *optional*, defaults to `[2, 5, 8, 11]`):
        The indexes of the global attention layers.
    num_pos_feats (`int`, *optional*, defaults to 128):
        The dimensionality of the position embedding.
    mlp_dim (`int`, *optional*):
        The dimensionality of the MLP layer in the Transformer encoder. If `None`, defaults to `mlp_ratio *
        hidden_size`.

    Example:

    ```python
    >>> from transformers import (
    ...     SamVisionConfig,
    ...     SamVisionModel,
    ... )

    >>> # Initializing a SamVisionConfig with `"facebook/sam-vit-huge"` style configuration
    >>> configuration = SamVisionConfig()

    >>> # Initializing a SamVisionModel (with random weights) from the `"facebook/sam-vit-huge"` style configuration
    >>> model = SamVisionModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```vision_configsam_vision_modeli   r   r   output_channels   r7   r9   r   num_channelsr   r   r   r   r   r   r   r   g        attention_dropoutg|=initializer_rangeTqkv_biasg      @	mlp_ratiouse_abs_posuse_rel_pos   window_size)r6      r8      .global_attn_indexes   num_pos_featsNr5   c                     | j                   "t        | j                  | j                  z        n| j                   | _         | j                  dz  | _        t        |   di | y )Nr6   r   )r5   r)   r   rH   scaler   r   r   s     r"   r   zSamVisionConfig.__post_init__   sR    AEAUs4++dnn<=[_[g[g%%*
''r#   )"r$   r%   r&   r'   r(   
model_typer   r)   r*   rB   r7   r9   rD   r   r+   r,   r   r   r-   r   r.   rE   rF   rG   boolrH   rI   rJ   rL   rO   rQ   r5   r   r/   r0   s   @r"   r?   r?   M   s%   B &O#JKOSs!!L#48Jd3i%S/1846Jd3i%S/16J!NE!%(us{($u$HdIuKKK7DcU38_4DM3GS4Z( (r#   r?   c                        e Zd ZU dZdZeeedZdZ	e
ez  dz  ed<   dZe
ez  dz  ed<   dZe
ez  dz  ed<   dZeed	<   d
Zeed<    fdZ xZS )	SamConfiga;  
    prompt_encoder_config (Union[`dict`, `SamPromptEncoderConfig`], *optional*):
        Dictionary of configuration options used to initialize [`SamPromptEncoderConfig`].
    mask_decoder_config (Union[`dict`, `SamMaskDecoderConfig`], *optional*):
        Dictionary of configuration options used to initialize [`SamMaskDecoderConfig`].

    Example:

    ```python
    >>> from transformers import (
    ...     SamVisionConfig,
    ...     SamPromptEncoderConfig,
    ...     SamMaskDecoderConfig,
    ...     SamModel,
    ... )

    >>> # Initializing a SamConfig with `"facebook/sam-vit-huge"` style configuration
    >>> configuration = SamConfig()

    >>> # Initializing a SamModel (with random weights) from the `"facebook/sam-vit-huge"` style configuration
    >>> model = SamModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config

    >>> # We can also initialize a SamConfig from a SamVisionConfig, SamPromptEncoderConfig, and SamMaskDecoderConfig

    >>> # Initializing SAM vision, SAM Q-Former and language model configurations
    >>> vision_config = SamVisionConfig()
    >>> prompt_encoder_config = SamPromptEncoderConfig()
    >>> mask_decoder_config = SamMaskDecoderConfig()

    >>> config = SamConfig(vision_config, prompt_encoder_config, mask_decoder_config)
    ```sam)r
   r3   r@   Nr@   r
   r3   g{Gz?rF   Ttie_word_embeddingsc                    t        | j                  t              rt        di | j                  | _        n| j                  t               | _        t        | j                  t              rt        di | j                  | _        n| j                  t               | _        t        | j                  t              rt        di | j                  | _        n| j                  t               | _        t        | $  di | y r   )

isinstancer@   dictr?   r
   r	   r3   r2   r   r   r   s     r"   r   zSamConfig.__post_init__   s    d(($/!0!F43E3E!FD'!0!2Dd00$7)?)]$B\B\)]D&''/)?)AD&d..5';'Wd>V>V'WD$%%-';'=D$''r#   )r$   r%   r&   r'   rT   r	   r2   r?   sub_configsr@   r\   r   r*   r
   r3   rF   r.   rY   rU   r   r/   r0   s   @r"   rW   rW      s    !F J!73(K 59M4**T18<@4"22T9@:> 0047>#u# $$( (r#   rW   )rW   r2   r	   r?   N)r'   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   r2   r?   rW   __all__r   r#   r"   <module>rb      s     . 3 # 23(- (  4(. 23!+ !  4!8 23<(& <(  4<(~ 23A(  A(  4A(H ]r#   