
    i                     r    d Z ddlZddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Zd
gZ	y)zMAMBA2 configuration    N)strict   )PreTrainedConfig)auto_docstringzstate-spaces/mamba2-2.8b)
checkpointc                       e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   d	Z
eed
<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZedz  ed<   dZeee   z  dz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed <   d!Zeed"<   dZeed#<   d$Zeez  ed%<   d&Zeed'<   d!Zeed(<   d)Z eed*<   d+ ed,      fZ!ee   e"ed-f   z  ed.<   dZ#eed/<   dZ$eed0<   dZ%eed1<   d2Z&eed3<   dZ'eed4<    fd5Z(d6 Z)e*d7        Z+ xZ,S )8Mamba2Configa  
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
        The epsilon to use in the layer normalization layers..
    expand (`int`, *optional*, defaults to 2):
        Expanding factor used to determine the intermediate size.
    n_groups (`int`, *optional*, defaults to 8):
        Number of groups for the evolution matrices of mamba 2.
    use_bias (`bool`, *optional*, defaults to `False`):
        Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block
    use_conv_bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the convolution layer of the mixer block.
    residual_in_fp32 (`bool`, *optional*, defaults to `True`):
        Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model
    rescale_prenorm_residual (`bool`, *optional*, defaults to `False`):
        Whether or not to rescale `out_proj` weights when initializing.
    chunk_size (`int`, *optional*, defaults to 256):
        Size of the chunks that will comprise the sequence.

    Example:

    ```python
    >>> from transformers import Mamba2Config, Mamba2Model

    >>> # Initializing a Mamba2 configuration
    >>> configuration = Mamba2Config()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = Mamba2Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mamba2   	num_heads@   head_dimi   
vocab_sizei   hidden_size
state_sizenum_hidden_layersgh㈵>layer_norm_epsilon   Npad_token_idr   bos_token_id   eos_token_idexpand   conv_kernel   n_groupsFuse_biasTuse_conv_biassilu
hidden_actg?initializer_rangeresidual_in_fp32autotime_step_rankgMbP?time_step_mintime_step_maxg-C6?time_step_floorg        inf.time_step_limitrescale_prenorm_residual	use_cacherms_norm   
chunk_sizetie_word_embeddingsc                     | j                   dk(  r"t        j                  | j                  dz        n| j                   | _         t	        |   di | y )Nr$       )r%   mathceilr   super__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/mamba2/configuration_mamba2.pyr7   zMamba2Config.__post_init__[   sJ    040C0Cv0MDIId&&+,SWSfSf 	 	''    c                     | j                   | j                  z  | j                  | j                  z  k7  r@t	        d| j                   | j                  z   d| j                  | j                  z   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.z2Inconsistent configuration: hidden_size * expand (z#) must equal num_heads * head_dim (z).N)r   r   r   r   
ValueErrorr8   s    r;   validate_architecturez"Mamba2Config.validate_architecturea   sr    t{{*0NO$$t{{23 4NNT]]2327  Pr<   c                 "    dg| j                   z  S )Nmamba)r   r?   s    r;   layer_typeszMamba2Config.layer_typesj   s    y41111r<   )-__name__
__module____qualname____doc__
model_typer   int__annotations__r   r   r   r   r   r   floatr   r   r   listr   r   r   r   boolr   r!   strr"   r#   r%   r&   r'   r(   r*   tupler+   r,   r-   r/   r0   r7   r@   propertyrC   __classcell__)r:   s   @r;   r	   r	      s   B JIsHcJKJs $$ L#*  L#* +,L#S	/D(,FCOKHcHdM4J"u"!d! &NC#I& M5 M5!OU!8;U5\7JOT%[5#44J%*d*ItHdJ %%( 2 2r<   r	   )
rG   r4   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r3   r<   r;   <module>rV      sN      . 3 # 56R2# R2  7R2j 
r<   