
    iO                        d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZ ddlmZ  ej@                  e!      Z"dZ# G d de      Z$ G d de      Z% G d de      Z& G d de      Z' G d de      Z( G d de      Z) G d de      Z* G d  d!e      Z+g d"Z,y)#zPyTorch Qwen3 model.    )CallableN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging   )GemmaMLP)LlamaAttention)Qwen2ForCausalLMQwen2ForQuestionAnsweringQwen2ForSequenceClassificationQwen2ForTokenClassificationQwen2RMSNormQwen2RotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )Qwen3ConfigzQwen/Qwen3-8Bc                       e Zd Zy)Qwen3RMSNormN__name__
__module____qualname__     x/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/qwen3/modular_qwen3.pyr   r   0       r    r   c                       e Zd Zy)Qwen3MLPNr   r   r    r!   r$   r$   4   r"   r    r$   c                       e Zd Zy)Qwen3RotaryEmbeddingNr   r   r    r!   r&   r&   8   r"   r    r&   c                        e Zd Zdedef fdZ	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	e
e   d
eej                  ej                  dz  f   fdZ xZS )Qwen3Attentionconfig	layer_idxc                 R   t        |d      r|j                  |   nd | _        t        |   ||       t        | j                  |j                        | _        t        | j                  |j                        | _	        | j                  dk(  r|j                  | _
        y d | _
        y )Nlayer_types)epssliding_attention)hasattrr,   
layer_typesuper__init__r   head_dimrms_norm_epsq_normk_normsliding_window)selfr)   r*   	__class__s      r!   r2   zQwen3Attention.__init__=   s    ;B6=;Y&,,Y7_c+"4==f6I6IJ"4==f6I6IJ7;J]7]f33cgr    Nhidden_statesposition_embeddingsattention_maskpast_key_valueskwargsreturnc                 \   |j                   d d }g |d| j                  }| j                  | j                  |      j	                  |            j                  dd      }| j                  | j                  |      j	                  |            j                  dd      }	| j                  |      j	                  |      j                  dd      }
|\  }}t        ||	||      \  }}	| |j                  |	|
| j                        \  }	}
t        j                  | j                  j                  t               } || ||	|
|f| j"                  sdn| j$                  | j&                  | j(                  d|\  }} |j*                  g |d j-                         }| j/                  |      }||fS )Nr   r   g        )dropoutscalingr7   )shaper3   r5   q_projview	transposer6   k_projv_projr   updater*   r   get_interfacer)   _attn_implementationr   trainingattention_dropoutrC   r7   reshape
contiguouso_proj)r8   r:   r;   r<   r=   r>   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                   r!   forwardzQwen3Attention.forwardD   s    $))#2.88b8$--8{{4;;}#=#B#B<#PQ[[\]_`a[[]!;!@!@!NOYYZ[]^_
{{=166|DNNqRST&S#7jRUWZ#[ j&'6'='=j,X\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL..
%
 
%
!\ *k));;;;FFHkk+.L((r    )N)r   r   r   r   intr2   torchTensortupler   r	   r   r\   __classcell__r9   s   @r!   r(   r(   <   s    h{ hs h )-')||') #5<<#=>') t+	')
 ') -.') 
u||U\\D00	1')r    r(   c                   .     e Zd Zdee   def fdZ xZS )Qwen3ForCausalLMsuper_kwargsr?   c                 "    t        |   di |S )a^  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Qwen3ForCausalLM

        >>> model = Qwen3ForCausalLM.from_pretrained("Qwen/Qwen3-8B")
        >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```r   )r1   r\   )r8   re   r9   s     r!   r\   zQwen3ForCausalLM.forwardo   s    4 w...r    )r   r   r   r	   r
   r   r\   ra   rb   s   @r!   rd   rd   n   s%    /12/ 
 / /r    rd   c                       e Zd Zy)Qwen3ForSequenceClassificationNr   r   r    r!   rh   rh      r"   r    rh   c                       e Zd Zy)Qwen3ForTokenClassificationNr   r   r    r!   rj   rj      r"   r    rj   c                       e Zd Zy)Qwen3ForQuestionAnsweringNr   r   r    r!   rl   rl      r"   r    rl   )rd   rl   Qwen3PreTrainedModel
Qwen3Modelrh   rj   )-__doc__collections.abcr   r^   cache_utilsr   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr	   utilsr
   r   gemma.modeling_gemmar   llama.modeling_llamar   qwen2.modeling_qwen2r   r   r   r   r   r   r   r   configuration_qwen3r   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r$   r&   r(   rd   rh   rj   rl   __all__r   r    r!   <module>r      s     $    B 6 5 & 0 +	 	 	 - 
		H	%% 	< 		x 		/ 	/)^ /)d/' /<	%C 		"= 		 9 	r    