
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzMiniMaxAI/MiniMax-Text-01-hf)
checkpointc                       e Zd ZU dZdZdgZdZdddddddd	Zd
gdgfddgdgfdgdgfdZddiZ	dZ
eed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   d Zeed!<   d"Zeed#<   d$Zeed%<   d&Zeed'<   dZedz  ed(<   d)Zedz  ed*<   d+Zeee   z  dz  ed,<   d-Zeed.<   dZ edz  ed/<   d0Z!eez  ed1<   d+Z"eed2<   dZ#eed<   d-Z$eed3<   d4Z%eed5<   d0Z&eed6<   dZ'e(e)z  dz  ed7<   dZ*ee   dz  ed8<   d9Z+eed:<   d)Z,eez  ed;<   d)Z-eez  ed<<   d)Z.eez  ed=<   d)Z/eez  ed><   d)Z0eez  ed?<   d)Z1eez  ed@<    fdAZ2 xZ3S )BMiniMaxConfiga  
    block_size (`int`, *optional*, defaults to 256):
        The length of each attention block, determining how queries, keys, and values
        are grouped and processed for intra- and inter-block attention.
    full_attn_alpha_factor (`float`, *optional*, defaults to 1):
        Weight for residual value in residual connection after normal attention.
    full_attn_beta_factor (`float`, *optional*, defaults to 1):
        Weight for hidden state value in residual connection after normal attention.
    linear_attn_alpha_factor (`float`, *optional*, defaults to 1):
        Weight for residual value in residual connection after lightning attention.
    linear_attn_beta_factor (`float`, *optional*, defaults to 1):
        Weight for hidden state value in residual connection after lightning attention.
    mlp_alpha_factor (`float`, *optional*, defaults to 1):
        Weight for residual value in residual connection after MLP.
    mlp_beta_factor (`float`, *optional*, defaults to 1):
        Weight for hidden state value in residual connection after MLP.

    ```python
    >>> from transformers import MiniMaxModel, MiniMaxConfig

    >>> # Initializing a MiniMax style configuration
    >>> configuration = MiniMaxConfig()

    >>> # Initializing a model from the MiniMax style configuration
    >>> model = MiniMaxModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```minimaxpast_key_valuesg    .Acolwiserowwisepacked_colwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.experts	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormnum_expertsnum_local_expertsi }  
vocab_sizei   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headsNhead_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingssliding_windowg        attention_dropoutnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefrouter_jitter_noiserope_parameterslayer_types   
block_sizefull_attn_alpha_factorfull_attn_beta_factorlinear_attn_alpha_factorlinear_attn_beta_factormlp_alpha_factormlp_beta_factorc                     | j                   | j                  | _         | j                  ;t        | j                        D cg c]  }t        |dz   dz        rdnd c}| _        t        |   di | y c c}w )Nr*   r,   full_attentionlinear_attention )r!   r   r6   ranger   boolsuper__post_init__)selfkwargsi	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/minimax/configuration_minimax.pyrF   zMiniMaxConfig.__post_init__q   sz    ##+'+'?'?D$#W\]a]s]sWt RSD!a%1$5 ;MM D 	''	 s   A6)4__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencedefault_thetabase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r   r!   r"   r$   strr%   r&   floatr'   r(   rD   r)   r+   r-   listr.   r/   r0   r1   r   r2   r3   r4   r5   r   dictr6   r8   r9   r:   r;   r<   r=   r>   rF   __classcell__)rJ   s   @rK   r
   r
      s%   < J#4"5M%.%.%.%.-=*3 0 &(9:#%568IJ!"_$56
 #$78MJK"s"s!!  HcDjJ#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%!%NC$J%%(us{(  s!&$&"'%'!$$48O^d*T18$(KcT!(J*+C%K+)*3;*,-cEk-+,S5[,$%cEk%#$OS5[$	( 	(    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__rB   r]   rK   <module>rc      sK   * / 3 1 # 9:\($ \(  ;\(~ 
r]   