
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzSmallDoge/Doge-320M)
checkpointc                   :    e Zd ZU dZdZdgZddddddddddddZd	gd
gfddgdgfdgdgfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee	z  e
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   d Zee
d!<   dZe	e
d"<   d#Zeez  d#z  e
d$<   d%Ze	e
d&<   d#Ze	d#z  e
d'<   d Zee
d(<   dZed#z  e
d)<   d Zee
d*<   d#Z e	d#z  e
d+<   dZ!e	e
d,<   d Z"ee
d-<   d.Z#e	e
d/<   d0Z$e	e
d1<   d Z%ee
d2<   d Z&ee
d3<   d4Z'ee
d5<   d#Z(e	d#z  e
d6<   d#Z)e	d#z  e
d7<   d#Z*e	e+e	   z  d#z  e
d8<    fd9Z, xZ-S ):
DogeConfiga  
    keep_window_size (`int`, *optional*, defaults to 2048):
        The window size of tokens that are not dynamically masked, and dynamic masking is only performed when the sequence length exceeds this value.
    is_moe (`bool`, *optional*, defaults to `False`):
        Whether to use the Cross Domain Mixture of Experts, if `True`, the MoE will inherit the MLP to initialize.

    ```python
    >>> from transformers import DogeConfig, DogeModel

    >>> # Initializing a Doge-320M style configuration
    >>> configuration = DogeConfig()

    >>> # Initializing a model from the Doge-320M style configuration
    >>> model = DogeModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```dogepast_key_valuescolwiserowwisecolwise_gather_outputrowwise_split_input)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.dt_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_projzlayers.*.mlp.router_gatezlayers.*.mlp.down_embedzlayers.*.mlp.up_embed	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi   
vocab_sizei   hidden_sizei   intermediate_size    num_hidden_layersg        hidden_dropoutsilu
hidden_actg{Gz?initializer_rangegư>rms_norm_epsT	use_cacheFtie_word_embeddingsmax_position_embeddingsNrope_parameters   num_attention_headsnum_key_value_headsattention_biasattention_dropoutmlp_biassliding_windowkeep_window_sizeis_moei @  num_experts@   num_experts_per_toknorm_topk_proboutput_router_logitsgMbP?router_aux_loss_coefpad_token_idbos_token_ideos_token_idc                 ^    | j                   | j                  | _         t        |   di | y )N )r(   r'   super__post_init__)selfkwargs	__class__s     |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/doge/configuration_doge.pyr;   zDogeConfig.__post_init__f   s-    ##+'+'?'?D$''    ).__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   floatr   strr    r!   r"   boolr#   r$   r%   r   dictr'   r(   r)   r*   r+   r,   r-   r.   r/   r1   r2   r3   r4   r5   r6   r7   listr;   __classcell__)r>   s   @r?   r
   r
      s   & J#4"5 &/%.%.&/%."+ )"+$;#8!6 &(9:#%568IJ!"_$56 JK!s!s"%NECK%J#u#L%It %%#'S'48O^d*T18  &*t* ND &)ut|)Hd!%NC$J% c FDK!! ND !&$&"'%'#L#*##L#*#+/L#S	/D(/( (r@   r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r9   r@   r?   <module>rV      sJ   , / 3 1 # 01L(! L(  2L(^ .r@   