
    iF                         d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	  e	j                  e      Z ed      e G d	 d
e                    Zd
gZy)zMinistral model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringloggingz"mistralai/Ministral-3-8B-Base-2512)
checkpointc                       e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZddhZdZ	e
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZeed<   dZe
ed<   dZeed <   d!Zeed"<   d#Zeed$<   d%Ze
d&z  ed'<   d(Ze
d&z  ed)<   d*Ze
ee
   z  d&z  ed+<   d,Zeed-<   d&Ze e!z  d&z  ed.<   d&Z"e
d&z  ed/<   d0Z#ee
z  ed1<    fd2Z$ xZ%S )3Ministral3Configa'  
    Example:

    ```python
    >>> from transformers import Ministral3Config, Ministral3ForCausalLM, Mistral3Config, Mistral3ForConditionalGeneration, PixtralVisionConfig

    >>> # Initializing a Pixtral-vision config
    >>> vision_config = PixtralVisionConfig()

    >>> # Initializing a Ministral3 config
    >>> text_config = Ministral3Config()

    >>> # Initializing a Mistral3 configuration
    >>> configuration = Mistral3Config(vision_config, text_config)

    >>> # Initializing a model from the Ministral3 configuration
    >>> text_model = Ministral3ForCausalLM(text_config)

    >>> # Initializing a model from the Mistral3 configuration
    >>> model = Mistral3ForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
ministral3past_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormllama_4_scaling_betamax_position_embeddingsi   
vocab_sizei   hidden_sizei 8  intermediate_size"   num_hidden_layers    num_attention_heads   num_key_value_heads   head_dimsilu
hidden_acti   g{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cache   Npad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parameterssliding_windowg        attention_dropoutc                 L   | j                   dddd| j                  dddddd
| _         | j                  | j                  n| j                  | j                  z  | _        | j
                  | j                  | _        d	|v rt        j                  d
       t        | $  di | y )Nyarng    .Ag      0@i @  g      @@g      ?g?)
type
rope_thetafactor original_max_position_embeddingsr   	beta_fast	beta_slowmscale_all_dimmscaler   layer_typeszDetected Mistral model with layer_types. Consider using AutoModel or Ministral classes instead to enable alternating attention compatibility. )
r0   r   r#   r   r   r!   loggerwarning_oncesuper__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/ministral3/configuration_ministral3.pyrB   zMinistral3Config.__post_init__]   s    ''49+/+G+G! "%(+$D  *.)BHXHX\`\t\tHt##+'+'?'?D$F" ` 	''    )&__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planignore_keys_at_rope_validationr   int__annotations__r   r   r   r   r!   r#   r%   strr   r&   floatr'   r(   boolr*   r,   r.   listr/   r0   r   dictr1   r2   rB   __classcell__)rE   s   @rF   r   r      sr   2 J#4"5 &/%.%.%."+ )"+ &(9:#%568IJ!"_$56
 '=>W%X"JK"s"s!!  HcJ#)S)#u#L%It!L#*! L#* +,L#S	/D(, %%48O^d*T18!%NC$J%%(us{(( (rG   r   N)rK   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   
get_loggerrH   r?   r   __all__r>   rG   rF   <module>r_      sc    $ . 3 1 , 
		H	% ?@Z(' Z(  AZ(z 
rG   