
    i                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zMixtral model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzmistralai/Mixtral-8x7B-v0.1)
checkpointc                        e Zd ZU dZdZdgZdZdddddddd	Zd
gdgfddgdgfdgdgfdZddiZ	dZ
eed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   d Zeed!<   d"Zeed#<   d$Zeed%<   d&Zeed'<   dZedz  ed(<   d)Zedz  ed*<   d+Zeee   z  dz  ed,<   d-Zeed.<   dZ edz  ed/<   d0Z!eez  ed1<   d+Z"eed2<   dZ#eed<   d-Z$eed3<   d4Z%eed5<   d0Z&eed6<   dZ'e(e)z  dz  ed7<    fd8Z* xZ+S )9MixtralConfiga  
    Example:

    ```python
    >>> from transformers import MixtralModel, MixtralConfig

    >>> # Initializing a Mixtral 7B style configuration
    >>> configuration = MixtralConfig()

    >>> # Initializing a model from the Mixtral 7B style configuration
    >>> model = MixtralModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mixtralpast_key_valuesg    .Acolwiserowwisepacked_colwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.experts	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormnum_expertsnum_local_expertsi }  
vocab_sizei   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headsNhead_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingssliding_windowg        attention_dropoutnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefrouter_jitter_noiserope_parametersc                 ^    | j                   | j                  | _         t        |   di | y )N )r!   r   super__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/mixtral/configuration_mixtral.pyr9   zMixtralConfig.__post_init__V   s-    ##+'+'?'?D$''    ),__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencedefault_thetabase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r   r!   r"   r$   strr%   r&   floatr'   r(   boolr)   r+   r-   listr.   r/   r0   r1   r   r2   r3   r4   r5   r   dictr9   __classcell__)r<   s   @r=   r
   r
      s     J#4"5M%.%.%.%.-=*3 0 &(9:#%568IJ!"_$56
 #$78MJK"s"s!!  HcDjJ#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%!%NC$J%%(us{(  s!&$&"'%'!$$48O^d*T18( (r>   r
   N)rB   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r7   r>   r=   <module>rV      sN    " . 3 1 # 89A($ A(  :A(H 
r>   