
    i                     x    d Z ddlmZ ddlmZ ddlmZ ddlmZ e edd	       G d
 de                    Z	dgZ
y)zBamba model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz
    The BambaModel is a hybrid [mamba2](https://github.com/state-spaces/mamba) architecture with SwiGLU.
    The checkpoints are  jointly trained by IBM, Princeton, and UIUC.
    zibm-fms/Bamba-9.8b-2.2T-hf)custom_intro
checkpointc                       e Zd ZU dZdZddiZdgZdZee	d<   dZ
ee	d	<   d
Zee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZedz  e	d<   dZedz  e	d <   d!Zeee   z  dz  e	d"<   d#Zee	d$<   d%Zeez  dz  e	d&<   dZee   dz  e	d'<   d(Zedz  e	d)<   d*Z eez  dz  e	d+<   dZ!edz  e	d,<   d-Z"edz  e	d.<   d/Z#edz  e	d0<   d!Z$edz  e	d1<   d-Z%edz  e	d2<   dZ&edz  e	d3<   dZ'edz  e	d4<   d5Z(edz  e	d6<   d7Z)edz  e	d8<   d% ed9      fZ*ee   e+eef   z  dz  e	d:<   d%Z,edz  e	d;<   dZ-e.e/z  dz  e	d<<   dZ0ee	d=<   dZ1ee	d><    fd?Z2e3d@        Z4dA Z5 xZ6S )BBambaConfigaN  
    num_logits_to_keep (`int` or `None`, *optional*, defaults to 1):
        Number of prompt logits to calculate during generation. If `None`, all logits will be calculated. If an
        integer value, only last `num_logits_to_keep` logits will be calculated. Default is 1 because only the
        logits of the last prompt token are needed for generation. For long sequences, the logits for the entire
        sequence may use a lot of memory so, setting `num_logits_to_keep=1` will reduce memory footprint
        significantly.
    attn_layer_indices (`list`, *optional*):
        Specifies the layer indices that will have full attention. Must contain values at most num_hidden_layers.
    z_loss_coefficient (`float`, *optional*, defaults to 0.0):
        Coefficient for auxiliary z-loss used to control logit growth during training
    bambalayer_typeslayers_block_typepast_key_valuesi  
vocab_sizeFtie_word_embeddingsi   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   Nnum_key_value_headssilu
hidden_actg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cache   num_logits_to_keepr   pad_token_idbos_token_id   eos_token_idi   max_position_embeddingsg        attention_dropoutattn_layer_indices   mamba_n_headsautomamba_d_headmamba_n_groups   mamba_d_state   mamba_d_convmamba_expandmamba_chunk_sizemamba_conv_biasmamba_proj_biasgMbP?time_step_ming?time_step_maxinftime_step_limitz_loss_coefficientrope_parametersattention_biasmlp_biasc                 ,   | j                   | j                  | _         | j                  dk(  r+| j                  | j                  z  | j
                  z  | _        | j                  t        | j                        nd | _        d|d<   t        | $  di | y )Nr)   g      ?partial_rotary_factor )
r   r   r*   r0   r   r(   r7   tuplesuper__post_init__)selfkwargs	__class__s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/bamba/configuration_bamba.pyrA   zBambaConfig.__post_init__T   s    ##+'+'?'?D$ & $ 1 1D4D4D DHZHZ ZD>B>R>R>^uT%9%9:dh*-&'''    c                     t        | j                        D cg c]   }| j                  r|| j                  v rdnd" c}S c c}w )N	attentionmamba)ranger   r&   )rB   is     rE   r   zBambaConfig.layers_block_typeb   sJ     4112
 !33T=T=T8TK[bb
 	
 
s   %A c                     | j                   | j                  z  }|| j                  z  dk7  rt        d      | j                  | j                  z  |k7  rt        d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   z4mamba_n_heads must divide mamba_expand * hidden_sizezPThe dimensions for the Mamba head state do not match the model intermediate_sizeN)r0   r   r(   
ValueErrorr*   )rB   mamba_intermediates     rE   validate_architecturez!BambaConfig.validate_architecturei   sd    !..1A1AA 2 22a7STTt1115GGopp HrF   )7__name__
__module____qualname____doc__
model_typeattribute_mapkeys_to_ignore_at_inferencer   int__annotations__r   boolr   r   r   r   r   r   strr   floatr   r   r   r    r!   r#   listr$   r%   r&   r(   r*   r+   r-   r/   r0   r1   r2   r3   r4   r5   r7   r?   r8   r9   r   dictr:   r;   rA   propertyr   rO   __classcell__)rD   s   @rE   r   r      s0    J"$78M#4"5J %%K"s"s!!&'t'J#u#L%It%&d
& L#*  L#* +,L#S	/D(,#)S),/us{T)/+/S	D(/ #M3:#%+L#)d"+!"NC$J" #M3:# L#*  L#* #&cDj&#'OTD['#(OTD[("'M54<'"%M54<%ADeEl@SOT%[5#66=S'**48O^d*T18 ND Hd( 
 
qrF   r   N)rS   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   __all__r>   rF   rE   <module>re      sY      . 3 1 #  ,Qq" Qq Qqh /rF   