
    iP"                     
   d Z ddlmZ ddlmZ ddlmZmZ  e       rddlm	Z	m
Z
mZmZmZmZmZmZ dZn0ddlmZ ed	   Z	ed
   Z
ed   Zed   Zed   Zed   ZdededefdZdZ ed      e G d de                    ZdgZy)zxLSTM configuration.    )strict   )PreTrainedConfig)auto_docstringis_xlstm_available)BackendModeTypeChunkwiseKernelType	DtypeTypeSequenceKernelTypeStepKernelTypeWeightModeTyperound_up_to_next_multiple_ofxLSTMLargeConfigT)Literal)traintrain_with_padding	inference)chunkwise--native_autogradzparallel--native_autograd)float32bfloat16float16native_sequence__nativenative)singlefusedxmultiple_ofreturnc                 0    t        | |z   dz
  |z  |z        S )z0Rounds up x to the next multiple of multiple_of.   )int)r   r   s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/xlstm/configuration_xlstm.pyr   r   1   s     Q_q([8KGHH    FzNX-AI/xLSTM-7b)
checkpointc                   X    e Zd ZU dZdZdZeed<   dZeed<   dZ	edz  ed<   d	Z
eed
<   dZedz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zeed!<   d"Zeed#<   dZeed$<   d%Z e!ed&<   dZ"eed'<   d(Z#e!ed)<   d*Z$eed+<   d"Z%eed,<   d-Z&eed.<   d/Z'eed0<   d1Z(e)ed2<   dZ*eed3<   d4Z+edz  ed5<   d6Z,edz  ed7<   d8Z-ee.e   z  dz  ed9<   d:Z/eed;<    fd<Z0e1d=        Z2e1d>        Z3e1d?        Z4e1d@        Z5dA Z6 xZ7S )BxLSTMConfigaN  
    num_blocks (int, optional, *optional*, defaults to 32):
        Number of blocks of the xLSTM model, use num_hidden_layers if None.
    num_heads (int, optional, *optional*, defaults to 8):
        Number of heads for the xLSTM Layer/Cell.
    use_bias (bool, optional, *optional*, defaults to `False`):
        Whether to use biases in the xLSTM model.
    norm_reduction_force_float32 (bool, optional, *optional*, defaults to `True`):
        Whether to force the float32 norm reduction op to be done in fp32 precision.
    add_out_norm (bool, optional, *optional*, defaults to `True`):
        Whether to add an output norm after the blocks before the LMHead.
    qk_dim_factor (float, optional, *optional*, defaults to 0.5):
        Scale factor for the query and key dimension.
    v_dim_factor (float, optional, *optional*, defaults to 1.0):
        Scale factor for the value dimension.
    chunkwise_kernel (ChunkwiseKernelType, optional, *optional*, defaults to `"chunkwise--native_autograd"`):
        Kernel type for chunkwise processing mode.
    sequence_kernel (SequenceKernelType, optional, *optional*, defaults to `"native_sequence__native"`):
        Kernel type for sequence processing mode.
    step_kernel (StepKernelType, optional, *optional*, defaults to `"native"`):
        Kernel type for step processing mode.
    mode (BackendModeType, optional, *optional*, defaults to `"inference"`):
        Operation mode (inference is needed for generation).
    chunk_size (int, optional, *optional*, defaults to 64):
        Internal chunk size.
    return_last_states (bool, optional, *optional*, defaults to `True`):
        If to return the last states / cache internally. Needed as True for generation.
    autocast_kernel_dtype (DtypeType, optional, *optional*, defaults to `"bfloat16"`):
        Kernel dtype for the states.
    inference_state_dtype (DtypeType, optional, *optional*, defaults to `"float32"`):
        Kernel dtype for states in inference.
    ffn_proj_factor (float, optional, *optional*, defaults to 2.667):
        Size factor of the post-up projection gated Feed Forward network.
    ffn_round_up_to_multiple_of (int, optional, *optional*, defaults to 64):
        Size factor round value of the post-up projection gated Feed Forward network.
    gate_soft_cap (float, optional, *optional*, defaults to 15.0):
        Gate soft cap scale.
    output_logit_soft_cap (float, optional, *optional*, defaults to 30.0):
        Output logit soft cap scale.
    weight_mode (`Literal`, *optional*, defaults to `"single"`):
        Whether parallel linear layers are separated or fused (single).
    max_inference_chunksize (int, optional, *optional*, defaults to 16384):
        Limit the chunk size for inference to save memory.

    Example:

    ```python
    >>> from transformers import xLSTMConfig, xLSTMModel

    >>> # Initializing a xLSTM configuration
    >>> configuration = xLSTMConfig()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = xLSTMModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```xlstmi  
vocab_sizei   hidden_sizeNembedding_dim    num_hidden_layers
num_blocks   	num_headsFuse_biasTnorm_reduction_force_float32tie_word_embeddingsadd_out_normgư>norm_epsg      ?qk_dim_factorg      ?v_dim_factorr   chunkwise_kernelr   sequence_kernelr   step_kernelr   mode@   
chunk_sizereturn_last_statesr   autocast_kernel_dtypeepsr   inference_state_dtypegtV@ffn_proj_factorffn_round_up_to_multiple_ofg      .@gate_soft_capg      >@output_logit_soft_capr   weight_mode	use_cacher    pad_token_idr   bos_token_id   eos_token_idi @  max_inference_chunksizec                 l   | j                   | j                   n| j                  | _         | j                  | j                  n| j                   | _        | j                  | j                  n| j                  | _        | j                  | j                  n| j                  | _        t	        |   di | y )N )r)   r*   r,   r-   super__post_init__)selfkwargs	__class__s     r"   rO   zxLSTMConfig.__post_init__   s    /3/?/?/K4++QUQcQc373E3E3QT//W[WgWg;?;Q;Q;]!7!7cgcrcr-1__-H$//dNdNd''r#   c                 J    t        | j                  | j                  z  d      S Nr;   )r   )r   r)   r5   rP   s    r"   qk_dimzxLSTMConfig.qk_dim   s&    +t111
 	
r#   c                 J    t        | j                  | j                  z  d      S rT   )r   r)   r6   rU   s    r"   v_dimzxLSTMConfig.v_dim   s&    +t000
 	
r#   c                 4    | j                   | j                  z  S N)rV   r/   rU   s    r"   qk_head_dimzxLSTMConfig.qk_head_dim   s    {{dnn,,r#   c                 4    | j                   | j                  z  S rZ   )rX   r/   rU   s    r"   
v_head_dimzxLSTMConfig.v_head_dim   s    zzT^^++r#   c                    t         rAt        di d| j                  d| j                  d| j                  d| j
                  d| j                  d| j                  d| j                  d| j                  d	| j                  d
| j                  d| j                  d| j                  d| j                  d| j                  d| j                   d| j"                  d| j$                  d| j&                  d| j(                  d| j*                  d| j,                  d| j.                  d| j0                  d| j2                  S | S )Nr(   r*   r-   r/   r0   r3   r4   r1   r5   r6   r7   r8   r9   r:   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rM   )external_xlstmr   r(   r)   r,   r/   r0   r3   r4   r1   r5   r6   r7   r8   r9   r:   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rU   s    r"   to_xlstm_block_configz!xLSTMConfig.to_xlstm_block_config   s   # ??"..  11 ..	
  "..  .2-N-N #00 ".. "&!6!6 !% 4 4 !,,  YY!"  ??#$ $(#:#:%& '+&@&@'( HH)* '+&@&@+. !% 4 4/0 -1,L,L14 #0056 '+&@&@78 !,,9 > Kr#   )8__name__
__module____qualname____doc__
model_typer(   r!   __annotations__r)   r*   r,   r-   r/   r0   boolr1   r2   r3   r4   floatr5   r6   r7   r	   r8   r   r9   r   r:   r   r<   r=   r>   r
   r?   r@   rA   rB   rC   rD   rE   r   rF   rG   rH   rJ   listrK   rO   propertyrV   rX   r[   r]   r`   __classcell__)rR   s   @r"   r&   r&   8   s   9v JJK $M3:$s!Jd
!IsHd)- $- %%L$HeM5L%,H)H*CO'C"*K*'D/'J##'191C'090"OU"'))M5#'5'"*K*It L#*  L#* +,L#S	/D(,#(S(( 
 
 
 
 - - , ,!r#   r&   N)rd   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   xlstm.xlstm_large.modelr   r	   r
   r   r   r   r   r   r_   typingr   r!   r&   __all__rM   r#   r"   <module>rr      s     . 3 7 	 	 	 NHIO!	% 89I !:;X&N./NI I# I# I N +,]" ]  -]@ /r#   