
    i                     r    d Z ddlZddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Zd
gZ	y)zMAMBA configuration    N)strict   )PreTrainedConfig)auto_docstringzstate-spaces/mamba-2.8b)
checkpointc                       e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   d	Z
eed
<   dZeed<   dZedz  ed<   dZedz  ed<   dZeee   z  dz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  ed <   d!Zeed"<   d#Zeed$<   dZeed%<   d&Zeed'<   d(Zeed)<   dZ eed*<   dZ!eed+<   dZ"eed,<   dZ#eed-<   dZ$eed.<    fd/Z%e&d0        Z' xZ(S )1MambaConfiga:  
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
        The epsilon to use in the layer normalization layers.
    expand (`int`, *optional*, defaults to 2):
        Expanding factor used to determine the intermediate size.
    use_bias (`bool`, *optional*, defaults to `False`):
        Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block
    use_conv_bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the convolution layer of the mixer block.
    residual_in_fp32 (`bool`, *optional*, defaults to `True`):
        Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model
    rescale_prenorm_residual (`bool`, *optional*, defaults to `False`):
        Whether or not to rescale `out_proj` weights when initializing.
    use_mambapy (`bool`, *optional*, defaults to `False`):
        Determines the fallback strategy during training if the CUDA-based official implementation of Mamba is not available. If `True`,
        the mamba.py implementation is used. If `False`, the naive and slower implementation is used. Consider switching to the naive
        version if memory is limited.
    use_associative_scan (`bool`, *optional*, defaults to `True`):
        Whether to use PyTorch's `torch._higher_order_ops.associative_scan` for the parallel scan instead of the naive
        sequential implementation. The associative scan is only active during `torch.compile` tracing and
        requires torch >= 2.9.0. Both paths are tested to produce numerically identical results (see
        `test_associative_scan_matches_sequential`). Set to `False` to fall back to the sequential loop.

    Example:

    ```python
    >>> from transformers import MambaConfig, MambaModel

    >>> # Initializing a Mamba configuration
    >>> configuration = MambaConfig()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = MambaModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```mambaih  
vocab_sizei   hidden_size   
state_size    num_hidden_layersgh㈵>layer_norm_epsilonr   Npad_token_idbos_token_ideos_token_id   expand   conv_kernelFuse_biasTuse_conv_biassilu
hidden_actg?initializer_rangeresidual_in_fp32autotime_step_rankg      ?time_step_scalegMbP?time_step_mintime_step_maxrandomtime_step_init_schemeg-C6?time_step_floorrescale_prenorm_residual	use_cacheuse_mambapyuse_associative_scantie_word_embeddingsc                     t        | j                  | j                  z        | _        | j                  dk(  r"t        j                  | j                  dz        n| j                  | _        t        |    di | y )Nr   r    )	intr   r   intermediate_sizer    mathceilsuper__post_init__)selfkwargs	__class__s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/mamba/configuration_mamba.pyr3   zMambaConfig.__post_init__^   sf    !$T[[43C3C%C!D040C0Cv0MDIId&&+,SWSfSf 	 	''    c                 "    dg| j                   z  S )Nr
   )r   )r4   s    r7   layer_typeszMambaConfig.layer_typese   s    y41111r8   ))__name__
__module____qualname____doc__
model_typer   r.   __annotations__r   r   r   r   floatr   r   r   listr   r   r   boolr   r   strr   r   r    r!   r"   r#   r%   r&   r'   r(   r)   r*   r+   r3   propertyr:   __classcell__)r6   s   @r7   r	   r	      sM   $L JJKJs $$ L#*  L#* +,L#S	/D(,FCOKHdM4J"u"!d! &NC#I& OU  M5 M5!)3)!OU!%*d*ItK!%$% $$( 2 2r8   r	   )
r>   r0   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r-   r8   r7   <module>rK      sM      . 3 # 45M2" M2  6M2` /r8   