
    i                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)z$GraniteMoeShared model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz!ibm-granite/granite-speech-3.2-8b)
checkpointc                   ,    e Zd ZU dZdZdgZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   d
Zeed<   dZedz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZedz  ed<   dZeee   z  dz  ed<   dZeed<   dZeez  dz  ed <   dZeed!<   d"Zeez  dz  ed#<   d$Z eez  dz  ed%<   d$Z!eez  dz  ed&<   d$Z"eez  dz  ed'<   d$Z#eez  dz  ed(<   d)Z$edz  ed*<   dZ%edz  ed+<   dZ&edz  ed,<   d-Z'edz  ed.<   d/Z(eed0<    fd1Z) xZ*S )2GraniteMoeSharedConfiga  
    embedding_multiplier (`float`, *optional*, defaults to 1.0):
        embedding multiplier
    logits_scaling (`float`, *optional*, defaults to 1.0):
        divisor for output logits
    residual_multiplier (`float`, *optional*, defaults to 1.0):
        residual multiplier
    attention_multiplier (`float`, *optional*, defaults to 1.0):
        attention multiplier
    shared_intermediate_size (`int`, *optional*, defaults to 1024):
        intermediate size for shared experts.

    ```python
    >>> from transformers import GraniteMoeSharedModel, GraniteMoeSharedConfig

    >>> # Initializing a GraniteMoeShared granitemoe-3b style configuration
    >>> configuration = GraniteMoeSharedConfig()

    >>> # Initializing a model from the granitemoe-7b style configuration
    >>> model = GraniteMoeSharedModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    granitemoesharedpast_key_valuesi }  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg      ?embedding_multiplierlogits_scalingresidual_multiplierattention_multiplier   num_local_expertsnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefr   shared_intermediate_sizec                 ^    | j                   | j                  | _         t        |   di | y )N )r   r   super__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/granitemoeshared/configuration_granitemoeshared.pyr0   z$GraniteMoeSharedConfig.__post_init__X   s-    ##+'+'?'?D$''    )+__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r   listr   r    r   dictr!   r"   r#   r$   r%   r&   r(   r)   r*   r+   r,   r0   __classcell__)r3   s   @r4   r
   r
      s   4 $J#4"5JK"s"s!!&*t*J#'S'#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18 ND ,/us{T)//2%#+,2),NECK$&,.1t+1/2%#+,2$%sTz%&'t'(-$+-).%$,.$%c%( (r5   r
   N)r9   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r.   r5   r4   <module>rI      sL   & + . 3 1 # >?>(- >(  @>(B $
$r5   