
    i9                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz!swiss-ai/Apertus-8B-Instruct-2509)
checkpointc            	           e Zd ZU dZdZdgZdZdddddddddZd	gd
gfddgdgfdgdgfdZdZ	e
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
dz  ed<   dZeed<   dZe
ed<   dZeed<   dZeed <   d!Zeed"<   d#Ze
dz  ed$<   d%Ze
dz  ed&<   d'Ze
ee
   z  dz  ed(<   d)Zeed*<   dZee z  dz  ed+<   d)Z!eed,<   d-Z"ee
z  ed.<    fd/Z# xZ$S )0ApertusConfigaz  
    ```python
    >>> from transformers import ApertusModel, ApertusConfig

    >>> # Initializing a Apertus-8B style configuration
    >>> configuration = ApertusConfig()

    >>> # Initializing a model from the Apertus-8B style configuration
    >>> model = ApertusModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```apertuspast_key_values    `fAcolwisereplicated_with_grad_allreducerowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normzlayers.*.self_attn.o_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi   
vocab_sizei   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headsxielu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacher   pad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutc                     | j                   | j                  | _         | j                  ddddddd| _        t        |   di | y )	Nllama3r   g       @i    g      ?g      @)	rope_type
rope_thetafactor original_max_position_embeddingslow_freq_factorhigh_freq_factor )r   r   r+   super__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/apertus/configuration_apertus.pyr8   zApertusConfig.__post_init__S   sW    ##+'+'?'?D$'%(48#&$'$D  	''    )%__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencedefault_thetabase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r    strr!   r"   floatr#   r$   boolr%   r'   r)   listr*   r+   r   dictr,   r-   r8   __classcell__)r;   s   @r<   r
   r
      sb    J#4"5M%.%.%.%E%E%. )"+	 &(9:#%568IJ!"_$56 JK"s"s!!&*t*J#(S(#u#L%It L#*  L#* +,L#S	/D(, %%48O^d*T18 ND %(us{(( (r=   r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r6   r=   r<   <module>rT      sL   * / 3 1 # >?B($ B(  @B(J 
r=   