
    i(                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringztencent/Youtu-LLM-2B)
checkpointc                       e Zd ZU dZdZdgZddddZdgdgfd	d
gd	gfd	gd	gfdZi ZdZ	e
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
dz  ed<   dZe
ed<   dZe
dz  ed<   dZe
ed <   d!Zeed"<   d#Ze
ed$<   dZedz  ed%<   d&Zeed'<   d(Zeed)<   dZe
dz  ed*<   d+Ze
dz  ed,<   d-Z e
e!e
   z  dz  ed.<   d(Z"eed/<   dZ#e$e%z  dz  ed0<   d(Z&edz  ed1<   d2Z'eed3<   d4Z(ee
z  dz  ed5<   dZ)edz  ed6<    fd7Z* xZ+S )8YoutuConfiga   
    rope_interleave (`bool`, *optional*, defaults to `True`):
        Whether to interleave the rotary position embeddings.
    embedding_initializer_range (`float`, *optional*):
        The standard deviation of the truncated_normal_initializer for initializing all embedding matrices.

    ```python
    >>> from transformers import YoutuModel, YoutuConfig
    >>> # Initializing a Youtu-LLM-2B style configuration
    >>> configuration = YoutuConfig()
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```youtupast_key_valuescolwiserowwise)zlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei   intermediate_size    num_hidden_layers   num_attention_headsnum_key_value_headsi   kv_lora_ranki   Nq_lora_rank@   qk_rope_head_dim   
v_head_dimqk_nope_head_dimsilu
hidden_acti   max_position_embeddingsinitializer_rangegư>rms_norm_epsT	use_cachepad_token_idi  bos_token_idi eos_token_idtie_word_embeddingsrope_parametersrope_interleaveFattention_biasg        attention_dropoutembedding_initializer_rangec                 z   | j                   1| j                  dk7  rdd| j                  z  dz  z  | _         nd| _         | j                  xs d| j                   z  | _        | j                  | j                  | _        | j
                  | j                  z   | _        | j                  | _        t        | (  di | y )Nr   g       @g      @g      ?g{Gz? )r(   r   r3   r   r   r$   r!   qk_head_dimhead_dimsuper__post_init__)selfkwargs	__class__s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/youtu/configuration_youtu.pyr9   zYoutuConfig.__post_init__[   s    !!)1$),d6F6F0F3/N)N&)-&+/+K+K+ksUYUkUkOk(##+'+'?'?D$0043H3HH--''    ),__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r   r   r   r   r!   r#   r$   r&   strr'   r(   floatr)   r*   boolr+   r,   r-   listr.   r/   r   dictr0   r1   r2   r3   r9   __classcell__)r<   s   @r=   r
   r
   "   s    J#4"5"+ )"+ &(9:#%568IJ!"_$56
 MJK!s!s!!!!L#"Kt"c Jd
 cJ#)S)&*ut|*L%It#L#*#%L#*%+1L#S	/D(1 $$48O^d*T18#'OTD[' ND ,/us{T)/044( (r>   r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r5   r>   r=   <module>rU      sJ   6 / 3 1 # 12D(" D(  3D(N /r>   