
    i                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)z$GraniteMoeHybrid model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz!ibm-granite/granite-speech-3.2-8b)
checkpointc                       e Zd ZU dZdZddiZdgZdZee	d<   dZ
ee	d	<   d
Zee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZedz  e	d<   dZeee   z  dz  e	d<   d Zee	d!<   dZeez  dz  e	d"<   d Zee	d#<   d$Z eez  dz  e	d%<   d&Z!eez  dz  e	d'<   d&Z"eez  dz  e	d(<   d&Z#eez  dz  e	d)<   d&Z$eez  dz  e	d*<   d+Z%edz  e	d,<   dZ&edz  e	d-<   d Z'edz  e	d.<   d/Z(edz  e	d0<   d1Z)ee	d2<   dZ*edz  e	d3<   dZ+ee   dz  e	d<   d4Z,edz  e	d5<   dZ-edz  e	d6<   d7Z.edz  e	d8<   d9Z/eez  dz  e	d:<   d;Z0edz  e	d<<   dZ1edz  e	d=<   d7Z2edz  e	d><   dZ3edz  e	d?<   d Z4edz  e	d@<   d/Z5edz  e	dA<   dBZ6edz  e	dC<   d$ edD      fZ7eeef   e8eef   z  dz  e	dE<    fdFZ9dG Z: xZ;S )HGraniteMoeHybridConfiga  
    embedding_multiplier (`float`, *optional*, defaults to 1.0):
        embedding multiplier.
    logits_scaling (`float`, *optional*, defaults to 1.0):
        divisor for output logits.
    residual_multiplier (`float`, *optional*, defaults to 1.0):
        residual multiplier.
    attention_multiplier (`float`, *optional*, defaults to 1.0):
        attention multiplier.
    shared_intermediate_size (`int`, *optional*, defaults to 1024):
        intermediate size for shared experts.
    position_embedding_type (`str`, *optional*):
        Positional embedding type to be used; defaults to None. Allowed options: `[None, "rope"]`

    Example:

    ```python
    >>> from transformers import GraniteMoeHybridModel, GraniteMoeHybridConfig

    >>> # Initializing a GraniteMoeHybrid config
    >>> configuration = GraniteMoeHybridConfig()

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```granitemoehybridlayers_block_typelayer_typespast_key_valuesi }  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg      ?embedding_multiplierlogits_scalingresidual_multiplierattention_multiplier   num_local_expertsnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefi   shared_intermediate_sizeposition_embedding_type   mamba_n_headsmamba_n_groups   mamba_d_stateautomamba_d_head   mamba_d_convmamba_expandmamba_chunk_sizemamba_conv_biasmamba_proj_biastime_step_ming?time_step_maxinftime_step_limitc                 h   | j                   | j                  | _         | j                  | j                  z  }| j                  dk(  r|| j
                  z  | _        | j                  t        | j                        nd | _        | j                  dg| j                  z  | _        t        | ,  di | y )Nr5   mamba )r   r   r9   r   r6   r1   r@   tupler   r   super__post_init__)selfkwargsmamba_intermediate	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/granitemoehybrid/configuration_granitemoehybrid.pyrF   z$GraniteMoeHybridConfig.__post_init__c   s    ##+'+'?'?D$!..1A1AA& 2d6H6H HD>B>R>R>^uT%9%9:dh# 'y4+A+AAD''    c                     | j                   | j                  z  }|| j                  z  dk7  rt        d      | j                  | j                  z  |k7  rt        d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   z4mamba_n_heads must divide mamba_expand * hidden_sizezPThe dimensions for the Mamba head state do not match the model intermediate_sizeN)r9   r   r1   
ValueErrorr6   )rG   rI   s     rK   validate_architecturez,GraniteMoeHybridConfig.validate_architectureq   sf     "..1A1AA 2 22a7STTt1115GGopp HrL   )<__name__
__module____qualname____doc__
model_typeattribute_mapkeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r    listr!   r"   r   dictr#   r$   r%   r&   r'   r(   r*   r+   r,   r-   r.   r/   r   r1   r2   r4   r6   r8   r9   r:   r;   r<   r=   r>   r@   rD   rF   rO   __classcell__)rJ   s   @rK   r
   r
      s   4 $J(-8M#4"5JK"s"s!!&*t*J#'S'#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18 ND ,/us{T)//2#+,2),NC%K$&,.1ut+1/2#+,2$%sTz%&'t'(-$+-).%$,.$(c(*.S4Z.$(KcT!( #M3:#!"NC$J" #M3:#%+L#)d"+ L#*  L#* #&cDj&#'OTD['#(OTD[("'M54<'"%M54<%HKUSX\GZOT%,'%u*==DZ(qrL   r
   N)rS   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__rC   rL   rK   <module>rd      sR    + . 3 1 # >?_q- _q  @_qD $
$rL   