
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzrednote-hilab/dots.llm1.base)
checkpointc                       e Zd ZU dZdZdgZdddddddddddddddd	Zd
gdgfddgdgfdgdgfdZddiZdZ	e
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
dz  ed<   dZe
dz  ed<   dZe
dz  ed<   d Ze
dz  ed!<   d Ze
dz  ed"<   dZe
dz  ed#<   d$Ze
dz  ed%<   d&Zedz  ed'<   d(Zeed)<   d*Ze
ed+<   d,Zeed-<   d.Zeed/<   d0Z eed1<   d&Z!eed2<   dZ"e#e$z  dz  ed3<   d&Z%eed4<   d5Z&ee
z  dz  ed6<   d7Z'eed8<   d9Z(e
dz  ed:<   dZ)e
dz  ed;<   dZ*e+e   dz  ed<<   dZ,e
dz  ed=<   dZ-e
dz  ed><   dZ.e
e+e
   z  dz  ed?<    fd@Z/ xZ0S )ADots1Configa  
    n_group (`int`, *optional*, defaults to 1):
        Number of groups for routed experts.
    first_k_dense_replace (`int`, *optional*, defaults to 0):
        Number of dense layers at the beginning of the model before the first MoE layer.

    Examples:

    ```python
    >>> from transformers import Dots1Model, Dots1Config
    >>> # Initializing a Dots1 style configuration
    >>> configuration = Dots1Config()
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    dots1past_key_valuescolwiserowwisereplicated_with_grad_allreducepacked_colwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.expertsz%layers.*.mlp.shared_experts.gate_projz#layers.*.mlp.shared_experts.up_projz%layers.*.mlp.shared_experts.down_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormnum_local_expertsn_routed_expertsi R 
vocab_sizei   hidden_sizei*  intermediate_sizei  moe_intermediate_size>   num_hidden_layers    num_attention_headsNnum_key_value_headsn_shared_experts   n_group
topk_groupnum_experts_per_tokr   first_k_dense_replaceFnorm_topk_probsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachetie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg      ?routed_scaling_factori   sliding_windowmax_window_layerslayer_typespad_token_idbos_token_ideos_token_idc                 
   | j                   | j                  | _         | j                  Et        | j                        D cg c]!  }| j
                  || j                  k\  rdnd# c}| _        t        |    di | y c c}w )Nsliding_attentionfull_attention )	r#   r"   r8   ranger    r6   r7   super__post_init__)selfkwargsi	__class__s      ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/dots1/configuration_dots1.pyrB   zDots1Config.__post_init__l   s    ##+'+'?'?D$#
 t556	   &&2qD<R<R7R $%& D 	'' s   &B )1__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r    r"   r#   r$   r   r&   r'   r(   r)   r*   boolr,   strr-   r.   floatr/   r0   r1   r2   r   dictr3   r4   r5   r6   r7   r8   listr9   r:   r;   rB   __classcell__)rF   s   @rG   r
   r
      s4   " J#4"5 &/%.%.%.%E%E-=*3 01:/81:"+ )"+& &(9:#%568IJ!"_$56 	/M JK"s"!%3%s!!&(t(#'cDj'#'cDj'GS4ZJd
&*t*()3:)"'ND4K'J#'S'#u#L%It %%48O^d*T18 ND ,/us{T)/#&5&!%NC$J%$&sTz&$(KcT!(#L#*##L#*#+/L#S	/D(/( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r?   rY   rG   <module>r_      sJ   ( / 3 1 # 9:[(" [(  ;[(| /rY   