
    i                        d dl Z d dlmZ d dl mZ ddlmZmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$  ed      e G d de                    Z% G d de       Z& G d de      Z' G d de#      Z( G d de      Z) G d de"      Z* G d de$      Z+ G d  d!e!      Z, G d" d#e      Z- G d$ d%e      Z. G d& d'e      Z/ G d( d)e      Z0g d*Z1y)+    N)strict)nn   )CacheDynamicCache)PreTrainedConfig)create_causal_mask!create_sliding_window_causal_mask)BaseModelOutputWithPast)Unpack)TransformersKwargsauto_docstring)merge_with_config_defaults)capture_outputs   )MistralConfig)Qwen2AttentionQwen2DecoderLayerQwen2ForCausalLMQwen2ForQuestionAnsweringQwen2ForSequenceClassificationQwen2ForTokenClassificationQwen2MLP
Qwen2ModelQwen2PreTrainedModelQwen2RMSNormQwen2RotaryEmbeddingz$mistralai/Ministral-8B-Instruct-2410)
checkpointc                   6    e Zd ZU dZdZdZee   dz  ed<   d Z	y)MinistralConfiga  
    Example:

    ```python
    >>> from transformers import MinistralModel, MinistralConfig

    >>> # Initializing a Ministral 8B style configuration
    >>> configuration = MinistralConfig()

    >>> # Initializing a model from the Ministral 8B style configuration
    >>> model = MinistralModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```	ministralNlayer_typesc                     | j                   | j                  | _         | j                  #| j                  dndg| j                  z  | _        t        j                  | fi | y )Nsliding_attentionfull_attention)num_key_value_headsnum_attention_headsr"   sliding_windownum_hidden_layersr   __post_init__)selfkwargss     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/ministral/modular_ministral.pyr*   zMinistralConfig.__post_init__C   sg    ##+'+'?'?D$#'+':':'F#L\ && 'D 	&&t6v6    )
__name__
__module____qualname____doc__
model_typer"   liststr__annotations__r*    r.   r-   r    r    ,   s&      J$(KcT!(	7r.   r    c                       e Zd Zy)MinistralMLPNr/   r0   r1   r7   r.   r-   r9   r9   O       r.   r9   c                   $     e Zd Zdef fdZ xZS )MinistralAttention	layer_idxc                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        y )NF)bias)super__init__r   Linearhidden_sizer'   head_dimq_projr&   k_projv_proj)r+   configr>   	__class__s      r-   rB   zMinistralAttention.__init__T   s    +ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkr.   )r/   r0   r1   intrB   __classcell__rJ   s   @r-   r=   r=   S   s    l# l lr.   r=   c                       e Zd Zy)MinistralRMSNormNr:   r7   r.   r-   rO   rO   \   r;   r.   rO   c                       e Zd Zy)MinistralDecoderLayerNr:   r7   r.   r-   rQ   rQ   `   r;   r.   rQ   c                       e Zd Zy)MinistralPreTrainedModelNr:   r7   r.   r-   rS   rS   d   r;   r.   rS   c                       e Zd Zy)MinistralRotaryEmbeddingNr:   r7   r.   r-   rU   rU   h   r;   r.   rU   c                        e Zd Zdef fdZeee	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  d	edz  d
ee   defd                     Z xZS )MinistralModelrI   c                 (    t         |   |       | `y )N)rA   rB   has_sliding_layers)r+   rI   rJ   s     r-   rB   zMinistralModel.__init__m   s     #r.   N	input_idsattention_maskposition_idspast_key_valuesinputs_embeds	use_cacher,   returnc           
         |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }t        |x}	t              s)| j                  ||||d}
t        d
i |
t        d
i |
d}	|}| j                  ||      }t        | j                   d | j                  j"                         D ].  \  }} ||f|	| j                  j$                  |      ||||d|}0 | j'                  |      }t)        ||r|	      S d 	      S )Nz:You must specify exactly one of input_ids or inputs_embeds)rI   r      )device)rI   r^   r[   r]   r\   )r%   r$   )r[   r\   r]   r_   position_embeddings)last_hidden_stater]   r7   )
ValueErrorembed_tokensr   rI   get_seq_lengthtorcharangeshaperc   	unsqueeze
isinstancedictr	   r
   
rotary_emb	enumeratelayersr)   r"   normr   )r+   rZ   r[   r\   r]   r^   r_   r,   past_seen_tokenscausal_mask_mappingmask_kwargshidden_statesrd   idecoder_layers                  r-   forwardzMinistralModel.forwardq   s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L ?-F ++!."0#2 ,K #5"C{"C%F%U%U#
 &"oom\J )$++6U8U8U*V W 		A})24;;3J3J13MN) /#$7 M		 		-0&+/8O
 	
>B
 	
r.   )NNNNNN)r/   r0   r1   r    rB   r   r   r   ri   
LongTensorTensorr   FloatTensorboolr   r   r   ry   rL   rM   s   @r-   rW   rW   l   s    $ $   .2.204(,26!%:
##d*:
 t+:
 &&-	:

 :
 ((4/:
 $;:
 +,:
 
!:
    :
r.   rW   c                       e Zd Zy)MinistralForCausalLMNr:   r7   r.   r-   r   r      r;   r.   r   c                       e Zd Zy)"MinistralForSequenceClassificationNr:   r7   r.   r-   r   r      r;   r.   r   c                       e Zd Zy)MinistralForTokenClassificationNr:   r7   r.   r-   r   r      r;   r.   r   c                       e Zd Zy)MinistralForQuestionAnsweringNr:   r7   r.   r-   r   r      r;   r.   r   )r    rS   rW   r   r   r   r   )2ri   huggingface_hub.dataclassesr   r   cache_utilsr   r   configuration_utilsr   masking_utilsr	   r
   modeling_outputsr   processing_utilsr   utilsr   r   utils.genericr   utils.output_capturingr   mistral.configuration_mistralr   qwen2.modeling_qwen2r   r   r   r   r   r   r   r   r   r   r   r    r9   r=   rO   rQ   rS   rU   rW   r   r   r   r   __all__r7   r.   r-   <module>r      s     .  . 3 R 7 & 7 7 5 9    AB7m 7  C7B	8 	l l	| 		- 		3 		3 	B
Z B
J	+ 		)G 		&A 		$= 	r.   