
    iv&                         d dl mZ ddlmZ ddlmZ ddlmZmZ  ed      e G d	 d
e                    Z	 ed      e G d de                    Z
 ed      e G d de                    Z ed      e G d de                    Zg dZy)    )strict   )PreTrainedConfig)auto_docstring   )CONFIG_MAPPING
AutoConfigzyonigozlan/EdgeTAM-hf)
checkpointc                       e Zd ZU dZdZdZdeiZdZe	e
z  dz  ed<   dZee   dz  ed<   dZedz  ed<   dZeed	<   d
Zeed<   d
Zeed<   dZeed<   dZee   dz  ed<   dZeed<   dZeed<   dZeed<   dZeed<    fdZ xZS )EdgeTamVisionConfiga  
    backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`):
        The list of channel dimensions for the backbone.
    backbone_feature_sizes (`List[List[int]]`, *optional*, defaults to `[[256, 256], [128, 128], [64, 64]]`):
        The spatial sizes of the feature maps from the backbone.
    fpn_hidden_size (`int`, *optional*, defaults to 256):
        The hidden dimension of the FPN.
    fpn_kernel_size (`int`, *optional*, defaults to 1):
        The kernel size for the convolutions in the neck.
    fpn_stride (`int`, *optional*, defaults to 1):
        The stride for the convolutions in the neck.
    fpn_padding (`int`, *optional*, defaults to 0):
        The padding for the convolutions in the neck.
    fpn_top_down_levels (`List[int]`, *optional*, defaults to `[2, 3]`):
        The levels for the top-down FPN connections.
    num_feature_levels (`int`, *optional*, defaults to 3):
        The number of feature levels from the FPN to use.
    vision_configedgetam_vision_modelbackbone_configNbackbone_channel_listbackbone_feature_sizes   fpn_hidden_size   fpn_kernel_size
fpn_strider   fpn_paddingfpn_top_down_levelsr   num_feature_levelsgelu
hidden_actư>layer_norm_eps{Gz?initializer_rangec                 0   | j                   g dn| j                   | _         | j                  ddgddgddggn| j                  | _        | j                  ddgn| j                  | _        t        | j                  t
              rT| j                  j                  dd      | j                  d<   t        | j                  d      di | j                  | _        n.| j                  "t        j                  d	dd
g dd      | _        t        | ,  di | y )N)i     `   0   r      @   r   r   
model_typetimm_wrapperztimm/repvit_m1.dist_in1kT)r   r   r   r   )in_chansfeatures_onlyout_indices)
model_args )r   r   r   
isinstancer   dictgetr   r	   from_pretrainedsuper__post_init__selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/edgetam/configuration_edgetam.pyr2   z!EdgeTamVisionConfig.__post_init__D   s   "&"<"<"D$JdJd 	" 372M2M2Uc3Z#sb"X.[_[v[v 	# .2-E-E-MAq6SWSkSk d**D1151E1E1I1I,Xf1gD  .#1$2F2F|2T#U#mX\XlXl#mD !!)#-#=#=*()DQ]^$D  	''    )__name__
__module____qualname____doc__base_config_keyr&   r	   sub_configsr   r.   r   __annotations__r   listintr   r   r   r   r   r   r   r   strr   floatr   r2   __classcell__r6   s   @r7   r   r      s    & &O'J:K 7;OT,,t3:.249t+2*.D4K.OSOSJK,0cT)0J NE #u#( (r8   r   c                       e Zd ZU dZdZdZeed<   dZee	e   z  e
eef   z  ed<   dZee	e   z  e
eef   z  ed<   dZeed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   y)EdgeTamPromptEncoderConfigaY  
    mask_input_channels (`int`, *optional*, defaults to 16):
        The number of channels to be fed to the `MaskDecoder` module.
    num_point_embeddings (`int`, *optional*, defaults to 4):
        The number of point embeddings to be used.
    scale (`float`, *optional*, defaults to 1):
        The scale factor for the prompt encoder.
    prompt_encoder_configr   hidden_sizei   
image_size   
patch_sizemask_input_channels   num_point_embeddingsr   r   r   r   r   scaleN)r9   r:   r;   r<   r=   rI   rA   r?   rJ   r@   tuplerL   rM   rO   r   rB   r   rC   rP   r,   r8   r7   rG   rG   X   s     .OK48Jd3i%S/1846Jd3i%S/16!! !#!J NE E3Nr8   rG   c                       e Zd ZU dZdZdZeed<   dZe	ed<   dZ
eed<   d	Zeed
<   dZeed<   d	Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   y)EdgeTamMaskDecoderConfiga  
    mlp_dim (`int`, *optional*, defaults to 2048):
        The dimension of the MLP in the two-way transformer.
    attention_downsample_rate (`int`, *optional*, defaults to 2):
        The downsample rate for the attention layers.
    num_multimask_outputs (`int`, *optional*, defaults to 3):
        The number of multimask outputs.
    iou_head_depth (`int`, *optional*, defaults to 3):
        The depth of the IoU head.
    iou_head_hidden_dim (`int`, *optional*, defaults to 256):
        The hidden dimension of the IoU head.
    dynamic_multimask_via_stability (`bool`, *optional*, defaults to `True`):
        Whether to use dynamic multimask via stability.
    dynamic_multimask_stability_delta (`float`, *optional*, defaults to 0.05):
        The stability delta for the dynamic multimask.
    dynamic_multimask_stability_thresh (`float`, *optional*, defaults to 0.98):
        The stability threshold for the dynamic multimask.
    mask_decoder_configr   rI   r   r   i   mlp_dimr   num_hidden_layers   num_attention_headsattention_downsample_rater   num_multimask_outputsiou_head_depthiou_head_hidden_dimTdynamic_multimask_via_stabilityg?!dynamic_multimask_stability_deltag\(\?"dynamic_multimask_stability_threshN)r9   r:   r;   r<   r=   rI   rA   r?   r   rB   rU   rV   rX   rY   rZ   r[   r\   r]   boolr^   rC   r_   r,   r8   r7   rS   rS   p   s    & ,OKJGSs  %&s&!"3"NC"",0#T0/3%u304&4r8   rS   c                        e Zd ZU dZdZeeedZdZ	e
ez  dz  ed<   dZe
ez  dz  ed<   dZe
ez  dz  ed<   dZeed	<    fd
Z xZS )EdgeTamConfiga  
    prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*):
        Dictionary of configuration options used to initialize [`EdgeTamPromptEncoderConfig`].
    mask_decoder_config (Union[`dict`, `EdgeTamMaskDecoderConfig`], *optional*):
        Dictionary of configuration options used to initialize [`EdgeTamMaskDecoderConfig`].

     Example:

     ```python
     >>> from transformers import (
     ...     EdgeTamVisionConfig,
     ...     EdgeTamPromptEncoderConfig,
     ...     EdgeTamMaskDecoderConfig,
     ...     EdgeTamModel,
     ... )

     >>> # Initializing a EdgeTamConfig with `"facebook/edgetam.1_hiera_tiny"` style configuration
     >>> configuration = EdgeTamConfig()

     >>> # Initializing a EdgeTamModel (with random weights) from the `"facebook/edgetam.1_hiera_tiny"` style configuration
     >>> model = EdgeTamModel(configuration)

     >>> # Accessing the model configuration
     >>> configuration = model.config

     >>> # We can also initialize a EdgeTamConfig from a EdgeTamVisionConfig, EdgeTamPromptEncoderConfig, and EdgeTamMaskDecoderConfig
     >>> # Initializing EDGETAM vision encoder, memory attention, and memory encoder configurations
     >>> vision_config = EdgeTamVisionConfig()
     >>> prompt_encoder_config = EdgeTamPromptEncoderConfig()
     >>> mask_decoder_config = EdgeTamMaskDecoderConfig()

     >>> config = EdgeTamConfig(vision_config, prompt_encoder_config, mask_decoder_config)
     ```
    edgetam)r   rH   rT   Nr   rH   rT   r   r   c                 |   t        | j                  t              rT| j                  j                  dd      | j                  d<   t	        | j                  d      di | j                  | _        n| j                  t	        d          | _        t        | j
                  t              rt        di | j
                  | _        n| j
                  t               | _        t        | j                  t              rt        di | j                  | _        n| j                  t               | _        t        | (  di | y )Nr&   r   r,   )r-   r   r.   r/   r   rH   rG   rT   rS   r1   r2   r3   s     r7   r2   zEdgeTamConfig.__post_init__   s   d(($//3/A/A/E/ElTj/kD|,!/0B0B<0P!Q!gTXTfTf!gD'!/0F!G!IDd00$7)C)adF`F`)aD&''/)C)ED&d..5'?'[$BZBZ'[D$%%-'?'AD$''r8   )r9   r:   r;   r<   r&   r	   rG   rS   r>   r   r.   r   r?   rH   rT   r   rC   r2   rD   rE   s   @r7   rb   rb      sx    !F J#!;7K 59M4**T18<@4"22T9@:> 0047>#u#( (r8   rb   )rb   r   rG   rS   N)huggingface_hub.dataclassesr   configuration_utilsr   utilsr   autor   r	   r   rG   rS   rb   __all__r,   r8   r7   <module>rj      s   ( / 3 # - 238(* 8(  48(v 23!1   4, 23!5/ !5  4!5H 23A($ A(  4A(H mr8   