
    i                        d dl mZ d dlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ dd	lmZmZ d
dlmZmZmZmZmZmZmZ  ej4                  e      Zdej:                  dededej:                  fdZ  G d de      Z! G d de      Z"e G d de             Z#e G d de             Z$e G d de             Z% G d de
e#      Z& G d de	e#      Z' G d d ee#      Z(g d!Z)y)"    )CallableN   )Cache)FlashAttentionKwargs)GenericForQuestionAnswering GenericForSequenceClassificationGenericForTokenClassification)ALL_ATTENTION_FUNCTIONS)Unpack)auto_docstringlogging   )MistralAttentionMistralDecoderLayerMistralForCausalLMMistralModelMistralPreTrainedModelapply_rotary_pos_embeager_attention_forwardpositions_idsbetamax_position_embeddingsreturnc           	          d|t        j                  dt        j                  | |z        z         z  z   }|d d d d d d f   S )N   )torchlogfloor)r   r   r   scalings       /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/ministral3/modular_ministral3.pyget_llama_4_attn_scaler!      sB    $1u{{=CZ3Z'[#[\\\G1dAt#$$    c                       e Zd Z	 d
dej                  deej                  ej                  f   dej                  dz  dej                  dedz  dee   deej                  ej                  dz  f   fd	Z	y)Ministral3AttentionNhidden_statesposition_embeddingsattention_maskposition_idspast_key_valueskwargsr   c           
         |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|	t        || j                  j                  j                  d      | j                  j                  j                  d            j                  |	j                        z  }	| |j                  |
|| j                        \  }
}t!        j"                  | j                  j$                  t&              } || |	|
||f| j(                  sdn| j*                  | j,                  t/        | j                  dd       d|\  }} |j0                  g |d j3                         }| j5                  |      }||fS )	Nr   r   llama_4_scaling_beta original_max_position_embeddingsg        sliding_window)dropoutr   r/   )shapehead_dimq_projview	transposek_projv_projr   r!   configrope_parametersgettodtypeupdate	layer_idxr
   get_interface_attn_implementationr   trainingattention_dropoutr   getattrreshape
contiguouso_proj)selfr%   r&   r'   r(   r)   r*   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                    r    forwardzMinistral3Attention.forward#   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j#&<KK''++,BCKK''++,NO'
 "\
 	! &'6'='=j,X\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.L((r"   )N)
__name__
__module____qualname__r   Tensortupler   r   r   rR    r"   r    r$   r$   "   s     )--)||-) #5<<#=>-) t+	-)
 ll-) -) -.-) 
u||U\\D00	1-)r"   r$   c                       e Zd Zy)Ministral3DecoderLayerNrS   rT   rU   rX   r"   r    rZ   rZ   S       r"   rZ   c                       e Zd Zy)Ministral3PreTrainedModelNr[   rX   r"   r    r^   r^   W       r"   r^   c                       e Zd Zy)Ministral3ModelNr[   rX   r"   r    ra   ra   \   r_   r"   ra   c                       e Zd Zy)Ministral3ForCausalLMNr[   rX   r"   r    rc   rc   a   r_   r"   rc   c                       e Zd Zy) Ministral3ForTokenClassificationNr[   rX   r"   r    re   re   f   r\   r"   re   c                       e Zd Zy)#Ministral3ForSequenceClassificationNr[   rX   r"   r    rg   rg   j   r\   r"   rg   c                       e Zd Zy)Ministral3ForQuestionAnsweringNr[   rX   r"   r    ri   ri   n   r\   r"   ri   )rc   ri   ra   r^   rg   re   )*collections.abcr   r   cache_utilsr   modeling_flash_attention_utilsr   modeling_layersr   r   r	   modeling_utilsr
   processing_utilsr   utilsr   r   mistral.modeling_mistralr   r   r   r   r   r   r   
get_loggerrS   loggerrV   floatintr!   r$   rZ   r^   ra   rc   re   rg   ri   __all__rX   r"   r    <module>rw      s   $    B 
 6 & ,   
		H	%%%,, %e %^a %fkfrfr %
.)* .)b	0 	 	 6 	 	 	l 	 	 	. 	 		'DF_ 		*JLe 		%@B[ 	r"   