
    id                     f    d dl mZ ddlmZ ddlmZ  ed      e G d de                    ZdgZy	)
    )strict   )PreTrainedConfig)auto_docstringz'tue-mps/coco_panoptic_videomt_large_640)
checkpointc                      e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   d	Z
eed
<   dZeez  ed<   dZeed<   dZeed<   dZeee   z  eeef   z  ed<   dZeee   z  eeef   z  ed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeez  ed<   dZeed<   dZeed <   d!Zeed"<   d#Zeed$<   d%Zeed&<   d%Z eed'<   d(Z!eed)<   d*Z"eed+<   d,Z#eed-<   d.Z$eed/<   dZ%eed0<   y1)2VideomtConfiga  
    layerscale_value (`float`, *optional*, defaults to 1.0):
        Initial value for the LayerScale parameter.
    num_upscale_blocks (`int`, *optional*, defaults to 2):
        Number of upsampling blocks used in the decoder or segmentation head.
    use_swiglu_ffn (`bool`, *optional*, defaults to `False`):
        Whether to use the SwiGLU feedforward neural network.
    num_blocks (`int`, *optional*, defaults to 4):
        Number of feature blocks or stages in the architecture.
    no_object_weight (`float`, *optional*, defaults to 0.1):
        Loss weight for the 'no object' class in panoptic/instance segmentation.
    class_weight (`float`, *optional*, defaults to 2.0):
        Loss weight for classification targets.
    mask_weight (`float`, *optional*, defaults to 5.0):
        Loss weight for mask prediction.
    train_num_points (`int`, *optional*, defaults to 12544):
        Number of points to sample for mask loss computation during training.
    oversample_ratio (`float`, *optional*, defaults to 3.0):
        Oversampling ratio used in point sampling for mask training.
    importance_sample_ratio (`float`, *optional*, defaults to 0.75):
        Ratio of points to sample based on importance during training.
    num_queries (`int`, *optional*, defaults to 200):
        Number of object queries in the Transformer.
    num_register_tokens (`int`, *optional*, defaults to 4):
        Number of learnable register tokens added to the transformer input.

    Example:

    ```python
    >>> from transformers import VideomtConfig, VideomtForUniversalSegmentation

    >>> # Initialize configuration
    >>> config = VideomtConfig()

    >>> # Initialize model
    >>> model = VideomtForUniversalSegmentation(config)

    >>> # Access config
    >>> config = model.config
    ```videomti   hidden_size   num_hidden_layers   num_attention_headsgelu
hidden_actg        hidden_dropout_probg{Gz?initializer_rangegư>layer_norm_epsi  
image_size
patch_sizer   num_channels   	mlp_ratiog      ?layerscale_valuedrop_path_rate   num_upscale_blocksattention_dropoutFuse_swiglu_ffn
num_blocksg?no_object_weightg       @class_weightg      @mask_weightdice_weighti 1  train_num_pointsg      @oversample_ratiog      ?importance_sample_ratio   num_queriesnum_register_tokensN)&__name__
__module____qualname____doc__
model_typer   int__annotations__r   r   r   strr   floatr   r   r   listtupler   r   r   r   r   r   r   r   boolr    r!   r"   r#   r$   r%   r&   r'   r)   r*        /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/videomt/configuration_videomt.pyr	   r	      sU   'R JKs!!J'**#u# NE 47Jd3i%S/1746Jd3i%S/16L#Is!e!"%NECK%%(us{( ND J!e!L%KK!c!!e!%)U)K  r8   r	   N)huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r7   r8   r9   <module>r>      sI   * / 3 # DEE!$ E!  FE!P 
r8   