
    i                         d Z ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Z ed      e G d
 de                    Z ed      e G d de                    Z	dgZ
y)zIdefics model configuration    )strict   )PreTrainedConfig)auto_docstringzHuggingFaceM4/idefics-9b)
checkpointc                       e Zd ZU dZddiZdZeed<   dZee	e   z  e
eef   z  ed<   dZeed<   d	Zee	e   z  e
eef   z  ed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   y)IdeficsVisionConfigidefics_visionhidden_size	embed_dimi      
image_sizei   intermediate_size   
patch_size    num_hidden_layers   num_attention_headsr   num_channelsgelu
hidden_actgh㈵>layer_norm_eps        attention_dropout{Gz?initializer_rangeg      ?initializer_factorN)__name__
__module____qualname__
model_typeattribute_mapr   int__annotations__r   listtupler   r   r   r   r   r   strr   floatr   r   r        /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/idefics/configuration_idefics.pyr	   r	      s     "J"K0MIs47Jd3i%S/17!s!46Jd3i%S/16s!!L#J NE %(us{(#u# ##r+   r	   c                   j    e Zd ZU dZdZdZeed<   dZe	ed<   dZ
e	ed<   d	Ze	ed
<   dZe	ed<   dZeed<   y)IdeficsPerceiverConfigaB  
    use_resampler (`bool`, *optional*, defaults to `False`):
        Whether or not to use the resampler
    resampler_n_latents (`int`, *optional*, defaults to 64):
        Number of latent embeddings to resample ("compress") the input sequence to (usually < 128).
    resampler_depth (`int`, *optional*, defaults to 6):
        Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (< 3).
    resampler_n_heads (`int`, *optional*, defaults to 16):
        Number of heads in each Transformer block (for multi-headed self-attention).
    resampler_head_dim (`int`, *optional*, defaults to 96):
        Dimensionality of each head projection in the Transformer block.
    qk_layer_norms_perceiver (`bool`, *optional*, defaults to `False`):
        Whether or not to use qk layer norms in perceiver
    idefics_percieverFuse_resampler@   resampler_n_latents   resampler_depthr   resampler_n_heads`   resampler_head_dimqk_layer_norms_perceiverN)r   r    r!   __doc__r"   r0   boolr%   r2   r$   r4   r5   r7   r8   r*   r+   r,   r.   r.   /   sN     %JM4!!OSs  %*d*r+   r.   c                       e Zd ZU dZdZeedZdZe	e
d<   dZe	e
d<   dZe	e
d	<   d
Ze	e
d<   dZe	e
d<   dZe	e
d<   dZee	z  e
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   dZe	dz  e
d<   d Ze	dz  e
d!<   d"Ze	ee	   z  dz  e
d#<   d$Zee
d%<   d Z e	e
d&<   d$Z!ee
d'<   dZ"ee
d(<   d)Z#ee$z  e
d*<   d$Z%ee
d+<   dZ&ee
d,<   d)Z'ee$z  e
d-<   d$Z(ee
d.<   dZ)e*e+z  dz  e
d/<   dZ,e*e+z  dz  e
d0<    fd1Z- xZ.S )2IdeficsConfigah  
    additional_vocab_size (`int`, *optional*, defaults to 0):
        Additional vocabulary size of the model, typically for the special "<img>" token. Additional vocab tokens
        are always trainable whereas regular vocab tokens can be frozen or not.
    alpha_initializer (`str`, *optional*, defaults to `"zeros"`):
        Initialization type for the alphas.
    alphas_initializer_range (`float`, *optional*, defaults to 0.0):
        The standard deviation of the truncated_normal_initializer for initializing the alphas in the Gated Cross
        Attention.
    alpha_type (`str`, *optional*, defaults to `"float"`):
        Whether the gating alphas should be vectors or single floats.
    cross_layer_interval (`int`, *optional*, default to 1):
        Interval for cross attention (from text to image) layers.
    qk_layer_norms (`bool`, *optional*, defaults to `False`):
        Whether to add layer norm after q and k
    freeze_text_layers (`bool`, *optional*, defaults to `True`):
        Whether to freeze text layers
    freeze_text_module_exceptions (`bool`, *optional*, defaults to `[]`):
        Exceptions to freezing text layers when `freeze_text_layers` is `True`
    freeze_lm_head (`bool`, *optional*, defaults to `False`):
        Whether to freeze lm head
    freeze_vision_layers (`bool`, *optional*, defaults to `True`):
        Whether to freeze vision layers
    freeze_vision_module_exceptions (`bool`, *optional*, defaults to `[]`):
        Exceptions to freezing vision layers when `freeze_vision_layers` is `True`
    use_resampler (`bool`, *optional*, defaults to `False`):
        Whether to use the Resampler
    perceiver_config (`IdeficsPerceiverConfig`,  *optional*):
        Custom perceiver config or dict

    Example:

    ```python
    >>> from transformers import IdeficsModel, IdeficsConfig

    >>> # Initializing a Idefics idefics-9b style configuration
    >>> configuration = IdeficsConfig()

    >>> # Initializing a model from the idefics-9b style configuration
    >>> model = IdeficsModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```idefics)perceiver_configvision_configi }  
vocab_sizer   additional_vocab_sizei   r   i +  r   r   r   r   r   dropoutsilur   r   r   zerosalpha_initializeralphas_initializer_ranger)   
alpha_typegư>rms_norm_epsT	use_cacheNpad_token_id   bos_token_id   eos_token_idFtie_word_embeddingscross_layer_intervalqk_layer_normsfreeze_text_layersr*   freeze_text_module_exceptionsfreeze_lm_headfreeze_vision_layersfreeze_vision_module_exceptionsr0   r?   r>   c                 d   | j                   t               | _         n4t        | j                   t              rt        di | j                   | _         | j                  t               | _        n4t        | j                  t              rt        di | j                  | _        t        |   di | y )Nr*   )r>   r.   
isinstancedictr?   r	   super__post_init__)selfkwargs	__class__s     r,   r[   zIdeficsConfig.__post_init__   s      ($:$<D!--t4$:$ST=R=R$SD!%!4!6D**D1!4!Jt7I7I!JD''r+   )/r   r    r!   r9   r"   r.   r	   sub_configsr@   r$   r%   rA   r   r   r   r   rB   r)   r   r(   r   rE   rF   rG   rH   rI   r:   rJ   rL   rN   r&   rO   rP   rQ   rR   rS   r'   rT   rU   rV   r0   r?   rY   r   r>   r[   __classcell__)r^   s   @r,   r<   r<   K   s   +Z J'=PcdKJ!"3"K"s"s!!GUS[J#u#$s$&)e)JL%It L#*  L#* +,L#S	/D(, %% !#! ND ##24!4%<4 ND !%$%46#TE\6M448M4**T187;d--4;( (r+   r<   N)r9   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   r.   r<   __all__r*   r+   r,   <module>re      s   & " . 3 # 56$* $  7$$ 56+- +  7+4 56Y($ Y(  7Y(x 
r+   