
    iI                         d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	  e	j                  e      Z ed      e G d	 d
e                    Zd
gZy)zMistral model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringloggingzmistralai/Mistral-7B-v0.1)
checkpointc                       e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d <   d!Zee
d"<   dZe	dz  e
d#<   d$Ze	dz  e
d%<   d&Ze	ee	   z  dz  e
d'<   d(Zee
d)<   dZee z  dz  e
d*<   dZ!e	dz  e
d+<   d,Z"ee	z  e
d-<    fd.Z# xZ$S )/MistralConfiga  
    Example:

    ```python
    >>> from transformers import MistralModel, MistralConfig

    >>> # Initializing a Mistral 7B style configuration
    >>> configuration = MistralConfig()

    >>> # Initializing a model from the Mistral 7B style configuration
    >>> model = MistralModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mistralpast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi }  
vocab_sizei   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headsNhead_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parameterssliding_windowg        attention_dropoutc                     | j                   | j                   n| j                  | j                  z  | _         | j                  | j                  | _        d|v rt        j                  d       t        |   di |S )Nlayer_typeszDetected Mistral model with layer_types. Consider using AutoModel or Ministral classes instead to enable alternating attention compatibility. )r   r   r   r   loggerwarning_oncesuper__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/mistral/configuration_mistral.pyr5   zMistralConfig.__post_init__S   sv    )-)BHXHX\`\t\tHt##+'+'?'?D$F" ` w$.v..    )%__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r   r!   strr"   r#   floatr$   r%   boolr&   r(   r*   listr+   r,   r   dictr-   r.   r5   __classcell__)r8   s   @r9   r   r      sj     J#4"5 &/%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!  HcDjJ#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18!%NC$J%%(us{(	/ 	/r:   r   N)r>   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   
get_loggerr;   r2   r   __all__r1   r:   r9   <module>rQ      sb    " . 3 1 , 
		H	% 67@/$ @/  8@/F 
r:   