
    i}                         d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Z ed      e G d
 de                    Z	 ed      e G d de                    Z
g dZy)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzEmu3-community/Emu3-Chat-hf)
checkpointc                      e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed<   d	Zeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZee   eedf   z  ed<   dZeed<   dZee   eedf   z  ed<   dZeed<   dZeed<   dZeez  ed<   y)Emu3VQVAEConfigaz  
    embed_dim (`int`, *optional*, defaults to 4):
        Dimension of the quantized vector in codebook.
    out_channels (`int`, *optional*, defaults to 3):
        Output channel of decoder.
    temporal_downsample_factor (`int`, *optional*, defaults to 4):
        Temporal downsample factor.
    base_channels (`int`, *optional*, defaults to 256):
        Basic channel number of the intermediate blocks.
    channel_multiplier (`list[int]`, *optional*, defaults to `[1, 2, 2, 4]`):
        Channel scaling factor of the intermediate blocks.
    num_res_blocks (`int`, *optional*, defaults to 2):
        Residual block number in each stage.
    attn_resolutions (`list[int]`, *optional*, defaults to `[3]`):
        Stage indices to apply attention.

    ```python
    >>> from transformers import Emu3VQVAE, Emu3VQVAEConfig

    >>> # Initializing a video VQ model of Emu3 configuration
    >>> configuration = Emu3VQVAEConfig()

    >>> # Initializing a model from the Emu3 VQ model style configuration
    >>> model = Emu3VQVAE(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    
emu3_vqgan	vq_configi   codebook_size   	embed_dimlatent_channelsFdouble_latentr   in_channelsout_channelstemporal_downsample_factor   base_channels)      r   r   .channel_multiplierr   num_res_blocks)r   attn_resolutionsi   hidden_sizer   num_attention_headsg        attention_dropoutN)__name__
__module____qualname____doc__
model_typebase_config_keyr   int__annotations__r   r   r   boolr   r   r   r   r   listtupler   r   r   r   r   float     |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/emu3/configuration_emu3.pyr
   r
      s    < J!OM3IsOSM4KL#&''M36BS	E#s(O3BNC48d3i%S/18K  %(us{(r,   r
   c                   D   e Zd ZU dZdZdZdgZdZdZe	e
d<   dZe	e
d	<   d
Ze	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d<   dZe	e
d<   dZe	e
d<   dZe	ee	   z  dz  e
d<   dZeez  dz  e
d <   d!Zd!Zd"Z ee	z  e
d#<   d$Z!ee
d%<   d!Z"ee
d&<   y)'Emu3TextConfiga  
    Example:

    ```python
    >>> from transformers import Emu3Model, Emu3Config

    >>> # Initializing a Emu3-community/Emu3-Chat-hf style configuration
    >>> configuration = Emu3Config()

    >>> # Initializing a model from the Emu3-community/Emu3-Chat-hf style configuration
    >>> model = Emu3Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```emu3_text_modeltext_configpast_key_valuesg    .Ai. 
vocab_sizei   r   i 8  intermediate_size    num_hidden_layersr      Nnum_key_value_headssilu
hidden_acti $  max_position_embeddingsgh㈵>rms_norm_epsT	use_cachei[P pad_token_idi)Q bos_token_idi*Q eos_token_idrope_parametersFg?r   g{Gz?initializer_rangetie_word_embeddings)#r   r    r!   r"   r#   r$   keys_to_ignore_at_inferencedefault_thetar3   r%   r&   r   r4   r6   r   r8   r:   strr;   r<   r*   r=   r'   r>   r?   r@   r(   rA   r   dictmlp_biasattention_biasr   rB   rC   r+   r,   r-   r/   r/   L   s      #J#O#4"5MJK"s"s!!&'t'J#'S'L%ItL#L#+1L#S	/D(148O^d*T18HN%(us{(#u# %%r,   r/   c                        e Zd ZU dZdZdgZeedZdZ	e
ez  dz  ed<   dZe
ez  dz  ed<   dZe
eef   dz  ed<   d	Zeed
<    fdZ xZS )
Emu3Configz
    vocabulary_map (`dict`, *optional*):
        A dictionary containing the vocabulary map from the tokenizer. Used to obtain tokens from the image inputs.
    emu3r2   )r1   r   Nr   r1   vocabulary_mapFrC   c                    | j                   t               | _         n4t        | j                   t              rt        di | j                   | _         | j                  t               | _        n4t        | j                  t              rt        di | j                  | _        | j                  | j                  j                  d      nd | _        t        | (  di | y )Nz<image>r+   )r   r
   
isinstancerG   r1   r/   rM   getimage_token_idsuper__post_init__)selfkwargs	__class__s     r-   rS   zEmu3Config.__post_init__   s    >>!,.DN-,>t~~>DN#-/D(($/-A0@0@ADDHDWDWDcd1155i@im''r,   )r   r    r!   r"   r#   rD   r/   r
   sub_configsr   rG   r&   r1   rM   rF   r%   rC   r'   rS   __classcell__)rV   s   @r-   rK   rK   y   sv    
 J#4"5"0OK/3Ito%,304K&-4,0NDcNT)0 %%( (r,   rK   )rK   r/   r
   N)huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   r/   rK   __all__r+   r,   r-   <module>r^      s   " / 3 1 # 89/)& /)  :/)d 89(&% (&  :(&V 89(! (  :(< >r,   