
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzLGAI-EXAONE/EXAONE-4.0-32B)
checkpointc            
           e Zd ZU dZdZdgZdddddddddd	Zdgd	gfd
dgd
gfd
gd
gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d<   dZee
d <   d!Ze	d"z  e
d#<   d$Ze	ee	   z  d"z  e
d%<   d"Ze	d"z  e
d&<   d'Zee
d(<   d"Zeez  d"z  e
d)<   d*Z ee	z  e
d+<   dZ!e	d"z  e
d,<   d-Z"ee	z  d"z  e
d.<   d"Z#ee   d"z  e
d/<    fd0Z$ xZ%S )1Exaone4Configa~  
    sliding_window_pattern (`str`, *optional*):
        The pattern to use for sliding window attention. Can be one of:
            - `None`: No sliding window attention is used
            - `int`: Every `sliding_window` layers, use global attention, else use local attention.
            - `str`: A sequence of "L" (local attention) and "G" (global attention) characters that defines the
              attention pattern. The pattern starts from layer 0 and repeats every `sliding_window` layers. The
              final layer always uses global attention regardless of the pattern.
        For instance, sliding_window_pattern="LLLG" same as sliding_window=4, which means:
            - Layer 0, 1, 2: local attention,
            - Layer 3: global attention,
            ...(repeated)

    Example:

    ```python
    >>> from transformers import Exaone4Model, Exaone4Config

    >>> # Initializing a EXAONE configuration
    >>> configuration = Exaone4Config()

    >>> # Initializing a model from configuration
    >>> model = Exaone4Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```exaone4past_key_valuescolwisereplicated_with_grad_allreducerowwise)	zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei @  intermediate_size    num_hidden_layersnum_attention_headsnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacher   Nbos_token_id   eos_token_idpad_token_idFtie_word_embeddingsrope_parametersg        attention_dropoutsliding_window   sliding_window_patternlayer_typesc                    | j                   d| _        | j                  Nt        | j                        D cg c]*  }|dz   | j                  z  dk7  r|| j                  k  rdnd, c}| _        t        |   di | y c c}w )Nr      sliding_attentionfull_attention )r+   r-   r.   ranger   super__post_init__)selfkwargsi	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/exaone4/configuration_exaone4.pyr6   zExaone4Config.__post_init__d   s    &*+D'#
 t556	   Ut::;q@QI_I_E_ $%& D 	'' s   /A?)&__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r   strr    r!   floatr"   r#   boolr$   r&   listr'   r(   r)   r   dictr*   r+   r-   r.   r6   __classcell__)r:   s   @r;   r
   r
      s   8 J#4"5 &/%.%.%E%E%."+ )"+
 &(9:#%568IJ!"_$56 JK"s"s!!!!J#'S'#u#L%It L#* +,L#S	/D(,#L#*# %%48O^d*T18%(us{(!%NC$J%/0C#I,0$(KcT!(( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r3   rL   r;   <module>rR      sK   * / 3 1 # 78Q($ Q(  9Q(h 
rL   