
    i!                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzUsefulSensors/moonshine-tiny)
checkpointc                       e Zd ZU dZdZdgZdddddZd	Zee	d
<   dZ
ee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZedz  e	d<   dZedz  e	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d <   dZeez  dz  e	d!<   dZee	d"<   d#Z ee	d$<   d%Z!eez  e	d&<   dZ"edz  e	d'<   d(Z#ee$e   z  dz  e	d)<   dZ%edz  e	d*<   dZ&ee	d+<    fd,Z' xZ(S )-MoonshineConfiga	  
    encoder_num_key_value_heads (`int`, *optional*):
        This is the number of key_value heads that should be used to implement Grouped Query Attention. If
        `encoder_num_key_value_heads=encoder_num_attention_heads`, the model will use Multi Head Attention (MHA), if
        `encoder_num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
        converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
        by meanpooling all the original heads within that group. For more details, check out [this
        paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
        `num_attention_heads`.
    decoder_num_key_value_heads (`int`, *optional*):
        This is the number of key_value heads that should be used to implement Grouped Query Attention. If
        `decoder_num_key_value_heads=decoder_num_attention_heads`, the model will use Multi Head Attention (MHA), if
        `decoder_num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
        converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
        by meanpooling all the original heads within that group. For more details, check out [this
        paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
        `decoder_num_attention_heads`.
    pad_head_dim_to_multiple_of (`int`, *optional*):
        Pad head dimension in encoder and decoder to the next multiple of this value. Necessary for using certain
        optimized attention implementations.
    encoder_hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder.
    decoder_hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
        The non-linear activation function (function or string) in the decoder.

    Example:

    ```python
    >>> from transformers import MoonshineModel, MoonshineConfig

    >>> # Initializing a Moonshine style configuration
    >>> configuration = MoonshineConfig().from_pretrained("UsefulSensors/moonshine-tiny")

    >>> # Initializing a model from the configuration
    >>> model = MoonshineModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```	moonshinepast_key_valuesdecoder_num_key_value_headsdecoder_num_attention_headsdecoder_num_hidden_layersdecoder_hidden_act)num_key_value_headsnum_attention_headsnum_hidden_layers
hidden_acti   
vocab_sizei   hidden_sizei  intermediate_size   encoder_num_hidden_layers   encoder_num_attention_headsNencoder_num_key_value_headspad_head_dim_to_multiple_ofgeluencoder_hidden_actsilui   max_position_embeddingsg{Gz?initializer_range   decoder_start_token_idT	use_cacherope_parametersis_encoder_decoderFattention_biasg        attention_dropoutbos_token_id   eos_token_idpad_token_idtie_word_embeddingsc                     | j                   | j                  | _         | j                  | j                  | _        |j	                  dd       t        |   di | y )Npartial_rotary_factorg? )r   r   r   r   
setdefaultsuper__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/moonshine/configuration_moonshine.pyr4   zMoonshineConfig.__post_init__i   sX    ++3/3/O/OD,++3/3/O/OD,137''    ))__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   r   r   r   strr   r!   r"   floatr$   r%   boolr&   r   dictr'   r(   r)   r*   r,   listr-   r.   r4   __classcell__)r7   s   @r8   r
   r
      sg   &P J#4"5<<8*	M JK!s!%&s&%&s&'(('((.2t2.2t2.2t2$$$$#&S&#u#"#C#It48O^d*T18## ND %(us{( L#* +,L#S	/D(,#L#*# $$( (r9   r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r1   r9   r8   <module>rN      sK   * / 3 1 # 9:S(& S(  ;S(l 
r9   