
    iu                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zJetMoe model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzjetmoe/jetmoe-8b)
checkpointc                       e Zd ZU dZdZdgZddiZdZee	d<   dZ
ee	d	<   d
Zee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZed z  e	d!<   dZeee   z  d z  e	d"<   d Zed z  e	d#<   dZee	d$<   d Zee z  d z  e	d%<   d&Z!ee	d'<   dZ"ee	d(<   d)Z#eez  e	d*<    fd+Z$d, Z% xZ&S )-JetMoeConfigaj  
    kv_channels (`int`, *optional*, defaults to 128):
        Defines the number of channels for the key and value tensors.
    num_local_experts (`int`, *optional*, defaults to 8):
        Defines the number of experts in the MoE and MoA.

    ```python
    >>> from transformers import JetMoeModel, JetMoeConfig

    >>> # Initializing a JetMoe 4B style configuration
    >>> configuration = JetMoeConfig()

    >>> # Initializing a model from the JetMoe 4B style configuration
    >>> model = JetMoeModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    jetmoepast_key_valueshead_dimkv_channelsi }  
vocab_sizei   hidden_size   num_hidden_layers   num_key_value_heads   i   intermediate_sizei   max_position_embeddingssiluactivation_function   num_local_experts   num_experts_per_tokFoutput_router_logitsg{Gz?aux_loss_coefT	use_cache   Nbos_token_ideos_token_idpad_token_idtie_word_embeddingsrope_parametersgư>rms_norm_epsinitializer_rangeg        attention_dropoutc                 `    | j                   | j                  z  | _        t        |   di | y )N )r   r   num_attention_headssuper__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/jetmoe/configuration_jetmoe.pyr.   zJetMoeConfig.__post_init__H   s,    #'#;#;d>V>V#V ''    c                 L    | j                   | j                  kD  rt        d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.zG`num_experts_per_tok` must be less than or equal to `num_local_experts`N)r   r   
ValueError)r/   s    r2   validate_architecturez"JetMoeConfig.validate_architectureL   s'    ##d&<&<<fgg =r3   )'__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   strr   r   r   boolr   floatr    r"   r#   listr$   r%   r&   r   dictr'   r(   r)   r.   r6   __classcell__)r1   s   @r2   r
   r
      s-   ( J#4"5/MJKs!!K!s!#'S'%%s  !&$&M5It L#* +,L#S	/D(,#L#*# $$48O^d*T18L%#u#%(us{((hr3   r
   N)r:   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r+   r3   r2   <module>rK      sN    ! . 3 1 # -.6h# 6h  /6hr 
r3   