
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzHuggingFaceTB/SmolLM3-3B)
checkpointc                       e Zd ZU dZdZdgZdZddddddddZdgd	gfd
dgd
gfd
gd
gfdZdZ	e
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
dz  ed<   dZeed<   dZe
ed<   dZeed<   d Zeed!<   d"Zeed#<   d$Ze
dz  ed%<   d&Ze
dz  ed'<   d(Ze
ee
   z  dz  ed)<   dZeez  dz  ed*<   d+Z eed,<   dZ!e
dz  ed-<   dZ"ee
   dz  ed.<   dZ#e
ed/<   dZ$ee   dz  ed0<   d+Z%eed1<   d2Z&ee
z  ed3<   d+Z'eed4<   d"Z(eed5<    fd6Z) xZ*S )7SmolLM3Configa>  
    no_rope_layers (`List[int]`, *optional*):
        List with at least the same length as the number of layers in the model.
        A `1` at an index position indicates that the corresponding layer will use RoPE,
        while a `0` indicates that it's a NoPE layer.
    no_rope_layer_interval (`int`, *optional*, defaults to 4):
        If `no_rope_layers` is `None`, it will be created using a NoPE layer every
        `no_rope_layer_interval` layers.

    ```python
    >>> from transformers import SmolLM3Model, SmolLM3Config

    >>> # Initializing a SmolLM3 style configuration
    >>> configuration = SmolLM3Config()

    >>> # Initializing a model from the SmolLM3 style configuration
    >>> model = SmolLM3Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```smollm3past_key_valuesg    >Acolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei +  intermediate_size$   num_hidden_layers   num_attention_heads   Nnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachei pad_token_idi  bos_token_idi eos_token_idrope_parametersFuse_sliding_windowsliding_windowno_rope_layersno_rope_layer_intervallayer_typesattention_biasg        attention_dropoutmlp_biastie_word_embeddingsc                 "   | j                   | j                  | _         | j                  Dt        | j                        D cg c]   }t        |dz   | j                  z  dk7        " c}| _        | j                  g | _        t        | j                        D ]b  }| j                  |   }| j                  r*| j                  |s| j                  j                  d       H| j                  j                  d       d t        | 0  di | y c c}w )N   r   sliding_attentionfull_attention )r   r   r+   ranger   intr,   r-   r)   r*   appendsuper__post_init__)selfkwargs	layer_idxhas_rope	__class__s       /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/smollm3/configuration_smollm3.pyr;   zSmolLM3Config.__post_init__a   s    ##+'+'?'?D$&Y^_c_u_uYv#LUY]d&A&AAQFG#D #!D"4#9#9: >	..y9**t/B/B/NW_$$++,?@$$++,<=> 	''#s   %D)+__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencedefault_thetabase_model_tp_planbase_model_pp_planr   r8   __annotations__r   r   r   r   r   r    strr!   r"   floatr#   r$   boolr%   r&   r'   listr(   r   dictr)   r*   r+   r,   r-   r.   r/   r0   r1   r;   __classcell__)r@   s   @rA   r
   r
      s   , J#4"5M &/%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!&'t'J#(S(#u#L%It%L#*%%L#*%+1L#S	/D(148O^d*T18$$!%NC$J%'+NDI$+"#C#$(KcT!( ND %(us{(Hd $$( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r6   rR   rA   <module>rX      sK   * / 3 1 # 56U($ U(  7U(p 
rR   