
    i                         d dl mZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZ  ed	
      e G d de                    Z ed	
      e G d de                    ZddgZy)    )strict   )PreTrainedConfig)RopeParameters)auto_docstring)interval   )CONFIG_MAPPING
AutoConfigzrhymes-ai/Aria)
checkpointc                   (    e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<     edd      d      Zee
d<   d Zee
d!<   d"Zee
d#<   d$Ze	dz  e
d%<   d&Ze	dz  e
d'<   d$Ze	ee	   z  dz  e
d(<   d&Ze	dz  e
d)<   d*Zee
d+<   dZe e!z  dz  e
d,<   d*Z"ee
d-<   dZ#e	ez  dz  e
d.<   d*Z$ee
d/<   dZ%e	dz  e
d0<   d1Z&d2Z'e	e
d3<   d$Z(e	e
d4<   d$Z)e	e
d5<    fd6Z*d7 Z+ xZ,S )8AriaTextConfigaA  
    moe_num_experts (`int`, *optional*, defaults to 8):
        The number of experts in the MoE layer.
    moe_topk (`int`, *optional*, defaults to 2):
        The number of top experts to route to for each token.
    moe_num_shared_experts (`int`, *optional*, defaults to 2):
        The number of shared experts.
    	aria_textpast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projz%layers.*.mlp.shared_experts.gate_projz#layers.*.mlp.shared_experts.up_projz%layers.*.mlp.shared_experts.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi }  
vocab_sizei   hidden_sizeintermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg        g      ?)minmax{Gz?)defaultinitializer_rangegư>rms_norm_epsT	use_cacher	   pad_token_id   bos_token_ideos_token_idpretraining_tpFtie_word_embeddingsrope_parametersattention_biasattention_dropoutmlp_biashead_dimtext_config   moe_num_expertsmoe_topkmoe_num_shared_expertsc                     | j                   | j                  | j                  z  | _         | j                  | j                  | _        t	        |   di | y )N )r5   r   r   r    super__post_init__)selfkwargs	__class__s     |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/aria/configuration_aria.pyr>   zAriaTextConfig.__post_init__U   sO    ==  ,,0H0HHDM##+'+'?'?D$''    c                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)r?   s    rB   validate_architecturez$AriaTextConfig.validate_architecture]   sS    d666!;#D$4$4#5 622327  <rC   )-__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r    r"   strr#   r   r(   floatr)   r*   boolr+   r-   r.   listr/   r0   r1   r   dictr2   r3   r4   r5   base_config_keyr8   r9   r:   r>   rF   __classcell__rA   s   @rB   r   r      s    J#4"5%.%.%.%.1:/81: &(9:#%568IJ!"_$56 JK!s!s!!&*t*J#'S'9xCS9$GuGL%It L#*  L#* +,L#S	/D(,!"NC$J" %%48O^d*T18 ND ,/sU{T)/HdHcDj#OOSHc"#C#(rC   r   c                        e Zd ZU dZdZddiZeedZdZ	e
ez  dz  ed<   dZe
ez  dz  ed<   d	Zeee   z  ed
<   dZe
dz  ed<   dZeed<   dZeed<   dZeed<    fdZ xZS )
AriaConfigzq
    projector_patch_to_query_dict (`dict`, *optional*):
        Mapping of patch sizes to query dimensions.
    ariaimage_token_idimage_token_index)r6   vision_configNr^   r6   vision_feature_layerprojector_patch_to_query_dict	   r&   r(   Fr0   c                    | j                   
ddd| _         | j                   j                         D ci c]  \  }}t        |      t        |       c}}| _         t        | j                   j	                               | _        t        | j                  t              r:d| j                  d<   t        | j                  d      di | j                  | _        n| j                  t        d          | _        t        | j                  t              r)d| j                  v rt        di | j                  | _
        n| j                  t               | _
        t        | 4  di | y c c}}w )N      )i  i$  idefics3_visionrK   r<   )ra   itemsrO   r%   values'max_value_projector_patch_to_query_dict
isinstancer^   rU   r
   r6   r   r=   r>   )r?   r@   kvrA   s       rB   r>   zAriaConfig.__post_init__|   s7    --52D. JNIkIkIqIqIs-tAc!fc!fn-t*7:4;];];d;d;f7g4d(($//@D|,!/0B0B<0P!Q!gTXTfTf!gD'!/0A!B!DDd&&-,$BRBR2R-A0@0@AD%-/D'' .us   E%)rG   rH   rI   rJ   rK   attribute_mapr   r   sub_configsr^   rU   r   rP   r6   r`   rO   rT   ra   r]   r(   rR   r0   rS   r>   rW   rX   s   @rB   rZ   rZ   f   s    
 J-M #1:NK48M4**T1804K&-4,.#S	/.15!4$;5s#u# %%( (rC   rZ   N)huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   utils.type_validatorsr   autor
   r   r   rZ   __all__r<   rC   rB   <module>rv      s   ( / 3 1 # - - +,D% D  -DN +,*(! *(  -*(Z )
*rC   