
    i                        d Z ddlmZ ddlmZ ddlmZmZ  ej                  e	      Z
 ed      e G d d	e                    Z ed      e G d
 de                    Z ed      e G d de                    ZdgZy)zMllama model configuration    )strict   )PreTrainedConfig)auto_docstringloggingzmeta-llama/Llama-3.2-11B-Vision)
checkpointc                   x    e Zd ZU dZdZdZddiZdZee	d<   dZ
ee	d	<   d
Zee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZeee   z  eeef   z  e	d<   dZeee   z  eeef   z  e	d<   dZee	d<   dZee	d<   dZee   dz  e	d<   dZeee      dz  e	d<   d Zee	d!<    fd"Zd# Zed$efd%       Z xZ S )&MllamaVisionConfiga  
    num_global_layers (`int`, *optional*, defaults to 8):
        Number of global layers in the Transformer encoder. Vision model has a second transformer encoder, called global.
    vision_output_dim (`int`, *optional*, defaults to 7680):
        Dimensionality of the vision model output. Includes output of transformer
        encoder with intermediate layers and global transformer encoder.
    max_num_tiles (`int`, *optional*, defaults to 4):
        Maximum number of tiles for image splitting.
    intermediate_layers_indices (`list[int]`, *optional*, defaults to [3, 7, 15, 23, 30]):
        Indices of intermediate layers of transformer encoder from which to extract and output features.
        These output features are concatenated with final hidden state of transformer encoder.
    supported_aspect_ratios (`list[list[int]]`, *optional*):
        List of supported aspect ratios for image splitting. If not specified, the default supported aspect ratios
        are [[1, 1], [1, 2], [1, 3], [1, 4], [2, 1], [2, 2], [3, 1], [4, 1]] for `max_num_tiles=4`.

    Example:

    ```python
    >>> from transformers import MllamaVisionConfig, MllamaVisionModel

    >>> # Initializing a Llama config
    >>> config = MllamaVisionConfig()

    >>> # Initializing a vision model from the mllama-11b style configuration
    >>> model = MllamaVisionModel(config)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mllama_vision_modelvision_confignum_attention_headsattention_headsi   hidden_sizegelu
hidden_act    num_hidden_layers   num_global_layers   r   num_channelsi   intermediate_sizei   vision_output_dimi  
image_size   
patch_sizeh㈵>norm_eps   max_num_tilesNintermediate_layers_indicessupported_aspect_ratios{Gz?initializer_rangec           	          | j                   ddgddgddgddgddgddgddgddgg| _         | j                  	g d| _        t        |   di | y )N      r   r   )r                )r"   r!   super__post_init__selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/mllama/configuration_mllama.pyr.   z MllamaVisionConfig.__post_init__M   sv    ''/-.FQFQFQFQPQFUVXYTZ]^`a\befhidj+kD(++3/AD,''    c           
          | j                   ddgddgddgddgddgddgddgddggk(  r| j                  dk7  rt        d      yy)zOPart of `@strict`-powered validation. Validates the architecture of the config.r&   r'   r   r   z;max_num_tiles must be 4 for default supported aspect ratiosN)r"   r    
ValueErrorr0   s    r3   validate_architecturez(MllamaVisionConfig.validate_architectureU   sr     ((aVaVaVaVaQRVVWYZU[^_ab]cfgijek,ll""a'Z[[ ( mr4   returnc                 ,    t        | j                        S )N)lenr"   r7   s    r3   max_aspect_ratio_idz&MllamaVisionConfig.max_aspect_ratio_id]   s    4//00r4   )!__name__
__module____qualname____doc__
model_typebase_config_keyattribute_mapr   int__annotations__r   strr   r   r   r   r   r   r   listtupler   r   floatr    r!   r"   r$   r.   r8   propertyr<   __classcell__r2   s   @r3   r
   r
      s"   < 'J%O*,=>MKJssOSL#!s!!s!47Jd3i%S/1746Jd3i%S/16HeM348cT!186:T$s)_t3:#u#(\ 1S 1 1r4   r
   c                   \    e Zd ZU dZdZdZdZdZee	d<   dZ
ee	d<   d	Zee	d
<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee   dz  e	d<   d Zeez  e	d!<   d"Zee	d#<   d$Zeee   z  dz  e	d%<   d&Zedz  e	d'<    fd(Z  xZ!S ))MllamaTextConfiga  
    cross_attention_layers (`list[int]`, *optional*):
        Indices of the cross attention layers. If not specified, will default to [3, 8, 13, 18, 23, 28, 33, 38].

    Example:

    ```python
    >>> from transformers import MllamaTextModel, MllamaTextConfig

    >>> # Initializing a Mllama text config
    >>> config = MllamaTextConfig()

    >>> # Initializing a model from the Mllama text configuration
    >>> model = MllamaTextModel(config)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mllama_text_modeltext_configg    A  
vocab_sizei   r   silur   (   r   r   r   r   num_key_value_headsi 8  r   Nrope_parametersr   rms_norm_epsi   max_position_embeddingsr#   r$   T	use_cacheFtie_word_embeddingscross_attention_layersg        dropouti  bos_token_idi eos_token_idi pad_token_idc                 N    | j                   	g d| _         t        |   di | y )N)r   r         r*      !   &   r,   )r[   r-   r.   r/   s     r3   r.   zMllamaTextConfig.__post_init__   s'    &&.*HD'''r4   )"r=   r>   r?   r@   rA   rB   default_thetarR   rD   rE   r   r   rF   r   r   rU   r   rV   dictrW   rI   rX   r$   rY   boolrZ   r[   rG   r\   r]   r^   r_   r.   rK   rL   s   @r3   rN   rN   b   s   & %J#OMJKJs!!  #s##'OTD['L%#*S*#u#It %%/3DI,3GUS[L#+1L#S	/D(1%L#*%( (r4   rN   c                   |     e Zd ZU dZdZddiZeedZdZ	e
ez  dz  ed<   dZe
ez  dz  ed<   d	Zeed<    fd
Z xZS )MllamaConfiga  
    Example:

    ```python
    >>> from transformers import MllamaForConditionalGeneration, MllamaConfig, MllamaVisionConfig, MllamaTextConfig

    >>> # Initializing a CLIP-vision config
    >>> vision_config = MllamaVisionConfig()

    >>> # Initializing a Llama config
    >>> text_config = MllamaTextConfig()

    >>> # Initializing a mllama-11b style configuration
    >>> configuration = MllamaConfig(vision_config, text_config)

    >>> # Initializing a model from the mllama-11b style configuration
    >>> model = MllamaForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mllamaimage_token_idimage_token_index)rP   r   Nr   rP   rQ   c                    | j                   %t               | _         t        j                  d       n4t	        | j                   t
              rt        di | j                   | _         | j                  %t               | _        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _        t        | $  di | y )Nz9vision_config is None, using default mllama vision configz5text_config is None, using default mllama text configr,   )
r   r
   loggerinfo
isinstancerg   rP   rN   r-   r.   r/   s     r3   r.   zMllamaConfig.__post_init__   s    %!3!5DKKST**D1!3!Id6H6H!ID#/1DKKOP(($//C$2B2BCD''r4   )r=   r>   r?   r@   rA   rC   rN   r
   sub_configsr   rg   r   rE   rP   rm   rD   r.   rK   rL   s   @r3   rj   rj      sh    , J-M #3EWXK48M4**T1826K((4/6#s#( (r4   rj   N)r@   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   
get_loggerr=   ro   r
   rN   rj   __all__r,   r4   r3   <module>rx      s    ! . 3 , 
		H	% <=E1) E1  >E1P <=.(' .(  >.(b <=.(# .(  >.(b 
r4   