
    i(                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zQwen3 model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzQwen/Qwen3-8B)
checkpointc            
           e Zd ZU dZdZdgZdddddddddd	Zdgd	gfd
dgd
gfd
gd
gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d<   d Zee
d!<   d"Zee
d#<   d$Zee
d%<   dZeez  dz  e
d&<   d$Zee
d'<   d$Zee
d(<   dZe	dz  e
d)<   d*Z e	e
d+<   dZ!e"e   dz  e
d,<   d-Z#ee	z  e
d.<   dZ$e	dz  e
d/<   dZ%e	dz  e
d0<   dZ&e	e"e	   z  dz  e
d1<    fd2Z' xZ(S )3Qwen3Configap  
    ```python
    >>> from transformers import Qwen3Model, Qwen3Config

    >>> # Initializing a Qwen3 style configuration
    >>> configuration = Qwen3Config()

    >>> # Initializing a model from the Qwen3-8B style configuration
    >>> model = Qwen3Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    qwen3past_key_valuescolwisereplicated_with_grad_allreducerowwise)	zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormiQ 
vocab_sizei   hidden_sizei V  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_heads   head_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cacheFtie_word_embeddingsrope_parametersattention_biasuse_sliding_windowsliding_window   max_window_layerslayer_typesg        attention_dropoutpad_token_idbos_token_ideos_token_idc                 H   | j                   r| j                  nd | _        | j                  | j                  | _        | j                  Et        | j                        D cg c]!  }| j                  || j                  k\  rdnd# c}| _        t        | $  di | y c c}w )Nsliding_attentionfull_attention )
r)   r*   r   r   r-   ranger   r,   super__post_init__)selfkwargsi	__class__s      ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/qwen3/configuration_qwen3.pyr8   zQwen3Config.__post_init__V   s    595L5Ld11RV##+'+'?'?D$#
 t556	   &&2qD<R<R7R $%& D 	'' s   !&B))__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r   r!   strr"   r#   floatr$   r%   boolr&   r'   r   dictr(   r)   r*   r,   r-   listr.   r/   r0   r1   r8   __classcell__)r<   s   @r=   r
   r
      s    J#4"5 &/%.%.%E%E%."+ )"+
 &(9:#%568IJ!"_$56 JK"s"s!!&(t(HcJ#(S(#u#L%It %%48O^d*T18 ND $$!%NC$J%s$(KcT!(%(us{(#L#*##L#*#+/L#S	/D(/( (    r
   N)rA   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r5   rN   r=   <module>rT      sL      . 3 1 # ?+I(" I(  ,I(X /rN   