
    i                         d dl mZ ddlmZ ddlmZ ddlmZmZ  ed      e G d	 d
e                    Z	 ed      e G d de                    Z
d
dgZy)    )strict   )PreTrainedConfig)auto_docstring   )CONFIG_MAPPING
AutoConfigzOpenGVLab/InternVL3-1B-hf)
checkpointc                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   d
Zeed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeee   z  eedf   z  ed<   dZeee   z  eedf   z  ed<   d Zeed!<   d
Zeed"<   d#Zeed$<   d%Zeed&<   d#Z eed'<    fd(Z! xZ"S ))InternVLVisionConfigae  
    projection_dropout (`float`, *optional*, defaults to 0.0):
        Dropout probability for the projection layer.
    norm_type (`str`, *optional*, defaults to `"layer_norm"`):
        The type of normalization to use in the encoder. Can be `"layer_norm"` or `"rms_norm"`.
    use_mask_token (`bool`, *optional*, defaults to `False`):
        Whether to use a mask token for masked image modeling
    use_mean_pooling (`bool`, *optional*, defaults to `True`):
        Whether to mean pool the final hidden states of the patches instead of using the final hidden state of the
        CLS token, before applying the classification head.

    Example:

    ```python
    >>> from transformers import InternVLVisionConfig, InternVLVisionModel

    >>> # Initializing a InternVLVisionModel OpenGVLab/InternVL3-1B-hf style configuration
    >>> configuration = InternVLVisionConfig()

    >>> # Initializing a model (with random weights) from the OpenGVLab/InternVL3-1B-hf configuration
    >>> model = InternVLVisionModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```internvl_visionvision_configi   hidden_size   num_hidden_layers   num_attention_headsFattention_biasuse_qk_normi   intermediate_sizegelu
hidden_actg        hidden_dropout_probattention_dropoutprojection_dropoutg{Gz?initializer_range
layer_norm	norm_typegư>layer_norm_eps)  r    .
image_size)   r"   
patch_sizer   num_channelsuse_mask_tokenT use_absolute_position_embeddingsg?layer_scale_init_valueuse_mean_poolingc                 H   t        | j                  t        t        f      r| j                  n| j                  | j                  f| _        t        | j                  t        t        f      r| j                  n| j                  | j                  f| _        t        |   di | y )N )
isinstancer!   listtupler#   super__post_init__selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/internvl/configuration_internvl.pyr/   z"InternVLVisionConfig.__post_init__L   s    )$//D%=IDOOPTP_P_aeapapOq 	  *$//D%=IDOOPTP_P_aeapapOq 	 	''    )#__name__
__module____qualname____doc__
model_typebase_config_keyr   int__annotations__r   r   r   boolr   r   r   strr   floatr   r   r   r   r   r!   r,   r-   r#   r$   r%   r&   r'   r(   r/   __classcell__r3   s   @r4   r   r      s&   4 #J%OKs!! ND K!s!J'**%(us{(&))#u#!Is!!NE!4>Jd3i%S/1>4<Jd3i%S/1<L# ND -1$d1$'E'!d!( (r5   r   c                        e Zd ZU dZdZeedZdZe	e
z  dz  ed<   dZe	e
z  dz  ed<   dZeed<   d	Zeed
<   dZeed<   dZeed<   dZeee   z  ed<   dZeed<   dZeed<    fdZ xZS )InternVLConfiga>  
    downsample_ratio (`float`, *optional*, defaults to 0.5):
        Factor by which to downsample the image.

    Example:

    ```python
    >>> from transformers import InternVLForConditionalGeneration, InternVLConfig

    >>> # Initializing a InternVL style configuration
    >>> configuration = InternVLConfig()

    >>> # Initializing a model (with random weights) from the OpenGVLab/InternVL3-1B-hf configuration
    >>> model = InternVLForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```internvl)text_configr   Nr   rF   isP image_token_id   image_seq_lengthg      ?downsample_ratior   projector_hidden_actvision_feature_layerdefaultvision_feature_select_strategyTtie_word_embeddingsc                    t        | j                  t              rt        di | j                  | _        n| j                  t               | _        t        | j                  t              rT| j                  j                  dd      | j                  d<   t        | j                  d      di | j                  | _        n| j                  t        d          | _        t        |    di | y )Nr:   qwen2r*   )	r+   r   dictr   rF   getr   r.   r/   r0   s     r4   r/   zInternVLConfig.__post_init__y   s    d(($/!5!K8J8J!KD'!5!7Dd&&--1-=-=-A-A,PW-XD\*-d.>.>|.LMaPTP`P`aD%-g68D''r5   )r6   r7   r8   r9   r:   r	   r   sub_configsr   rS   r   r=   rF   rG   r<   rI   rJ   r@   rK   r?   rM   r,   rO   rP   r>   r/   rA   rB   s   @r4   rD   rD   V   s    & J",?STK48M4**T1826K((4/6 NC c!e! &#&,.#S	/.*3"C3 $$( (r5   rD   N)huggingface_hub.dataclassesr   configuration_utilsr   utilsr   autor   r	   r   rD   __all__r*   r5   r4   <module>r[      sv     / 3 # - 67:(+ :(  8:(z 67-(% -(  8-(` "#3
4r5   