
    i                        d dl mZ d dlZd dlmZ ddlmZmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z*  ejV                  e,      Z- G d de#      Z. G d de      Z/ G d de      Z0 G d de      Z1 G d de$      Z2 G d de(      Z3 G d  d!e      Z4 G d" d#e!      Z5 G d$ d%e"      Z6 G d& d'e       Z7g d(Z8y))    )CallableN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )Gemma2RotaryEmbedding)
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     x/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/qwen2/modular_qwen2.pyr(   zQwen2MLP.__init__&   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r(   __classcell__r2   s   @r3   r#   r#   %   s    Y Yr4   r#   c                       e Zd Zy)Qwen2RotaryEmbeddingNr5   r6   r7    r4   r3   r;   r;   -       r4   r;   c                        e Zd Zdedef fdZ	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	e
e   d
eej                  ej                  dz  f   fdZ xZS )Qwen2Attentionr1   	layer_idxc                    t        |d      r|j                  |   nd | _        t        |   ||       t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j                  |j                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _        t        j                  |j                  | j                  z  |j                  d      | _        | j                  dk(  r|j                  | _        y d | _        y )Nlayer_typesTr%   Fsliding_attention)hasattrrC   
layer_typer'   r(   r   r)   r*   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projsliding_window)r0   r1   rA   r2   s      r3   r(   zQwen2Attention.__init__2   s   ;B6=;Y&,,Y7_c+ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejk7;J]7]f33cgr4   Nhidden_statesposition_embeddingsattention_maskpast_key_valueskwargsreturnc                     |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
|\  }}t        ||	||      \  }}	| |j                  |	|
| j                        \  }	}
t        j                  | j                  j                  t              } || ||	|
|f| j                  sdn| j                   | j"                  | j$                  d|\  }} |j&                  g |d j)                         }| j+                  |      }||fS )Nr    r   g        )dropoutscalingrN   )shaperH   rI   view	transposerK   rL   r   updaterA   r   get_interfacer1   _attn_implementationr   trainingattention_dropoutrX   rN   reshape
contiguousrM   )r0   rO   rP   rQ   rR   rS   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                   r3   forwardzQwen2Attention.forward;   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&'6'='=j,X\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL..
%
 
%
!\ *k));;;;FFHkk+.L((r4   )N)r5   r6   r7   r!   intr(   torchTensortupler   r   r
   rm   r8   r9   s   @r3   r@   r@   1   s    h{ hs h )-')||') #5<<#=>') t+	')
 ') -.') 
u||U\\D00	1')r4   r@   c                       e Zd Zy)Qwen2DecoderLayerNr<   r=   r4   r3   rs   rs   e   r>   r4   rs   c                       e Zd Zy)Qwen2PreTrainedModelNr<   r=   r4   r3   ru   ru   i   r>   r4   ru   c                        e Zd Zdef fdZeee	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  d	edz  d
ee   defd                     Z xZS )
Qwen2Modelr1   c                 ^    t         |   |       d| j                  j                  v | _        y )NrD   )r'   r(   r1   rC   has_sliding_layersr/   s     r3   r(   zQwen2Model.__init__n   s'     "59P9P"Pr4   N	input_idsrQ   position_idsrR   inputs_embeds	use_cacherS   rT   c           
         |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }t        |x}	t              s9| j                  ||||d}
dt        di |
i}	| j                  rt        di |
|	d<   |}| j                  ||      }t!        | j"                  d | j                  j$                         D ].  \  }} ||f|	| j                  j&                  |      ||||d	|}0 | j)                  |      }t+        ||r|
      S d 
      S )Nz:You must specify exactly one of input_ids or inputs_embeds)r1   r   r    )device)r1   r|   rQ   rR   r{   full_attentionrD   )rQ   rP   r{   rR   r}   )last_hidden_staterR   r=   )
ValueErrorembed_tokensr   r1   get_seq_lengthro   arangerY   r   	unsqueeze
isinstancedictr   ry   r	   
rotary_emb	enumeratelayersnum_hidden_layersrC   normr   )r0   rz   rQ   r{   rR   r|   r}   rS   past_seen_tokenscausal_mask_mappingmask_kwargsrO   rP   idecoder_layers                  r3   rm   zQwen2Model.forwardr   s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L ?-F ++!."0#2 ,K !"4"C{"C# &&;\;k_j;k#$78%"oom\J )$++6U8U8U*V W 		A})24;;3J3J13MN$7) /# M		 		-0&+/8O
 	
>B
 	
r4   )NNNNNN)r5   r6   r7   r!   r(   r   r   r   ro   
LongTensorrp   r   FloatTensorboolr   r   r   rm   r8   r9   s   @r3   rw   rw   m   s    Q{ Q   .2.204(,26!%<
##d*<
 t+<
 &&-	<

 <
 ((4/<
 $;<
 +,<
 
!<
    <
r4   rw   c                       e Zd Zy)Qwen2ForCausalLMNr<   r=   r4   r3   r   r      r>   r4   r   c                       e Zd Zy)Qwen2ForSequenceClassificationNr<   r=   r4   r3   r   r      r>   r4   r   c                       e Zd Zy)Qwen2ForTokenClassificationNr<   r=   r4   r3   r   r      r>   r4   r   c                       e Zd Zy)Qwen2ForQuestionAnsweringNr<   r=   r4   r3   r   r      r>   r4   r   )ru   rw   r   Qwen2RMSNormr   r   r   )9collections.abcr   ro   r   cache_utilsr   r   masking_utilsr   r	   modeling_flash_attention_utilsr
   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r!   
get_loggerr5   loggerr#   r;   r@   rs   ru   rw   r   r   r   r   __all__r=   r4   r3   <module>r      s    $   . R B 6 & @ @ 7 5 :   4 , 
		H	%Yx Y	0 	1)^ 1)h	) 		/ 	D
 D
N	' 		%C 		"= 		 9 	r4   