
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzallenai/Olmo-3-7B-Instruct)
checkpointc                       e Zd ZU dZdZdgZddddddddZd	gd
gfddgdgfdgdgfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d <   d!Ze	dz  e
d"<   dZe	dz  e
d#<   d$Ze	ee	   z  dz  e
d%<   d&Zee
d'<   dZeez  dz  e
d(<   d&Zee
d)<   d*Z ee	z  e
d+<   d,Z!ee
d-<   dZ"e	dz  e
d.<   dZ#ee   dz  e
d/<    fd0Z$ xZ%S )1Olmo3Configa  
    Example:

    ```python
    >>> from transformers import Olmo3Model, Olmo3Config

    >>> # Initializing a Olmo3 7B style configuration
    >>> configuration = Olmo3Config()

    >>> # Initializing a model from the Olmo3 7B style configuration
    >>> model = Olmo3Model(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    olmo3past_key_valuescolwise_gather_outputrowwise_split_inputcolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangeT	use_cache   pad_token_idbos_token_idig  eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutgh㈵>rms_norm_epssliding_windowlayer_typesc                 $   | j                   | j                  | _         | j                  5t        | j                        D cg c]  }|dz   dz  dk7  rdnd c}| _        | j                   | j                  | _         t        |   di | y c c}w )Nr$      r   sliding_attentionfull_attention )r   r   r.   ranger   super__post_init__)selfkwargsi	__class__s      ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/olmo3/configuration_olmo3.pyr6   zOlmo3Config.__post_init__X   s    ##+'+'?'?D$#W\]a]s]sWt RSA{a'7#=MM D ##+'+'?'?D$'' s   B)&__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r    strr!   r"   floatr#   boolr%   r&   r'   listr(   r)   r   dictr*   r+   r,   r-   r.   r6   __classcell__)r:   s   @r;   r
   r
      s{   " J#4"5%<%<%<%:"+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!&*t*J#'S'#u#It L#* #L#*#+0L#S	/D(0 %%48O^d*T18 ND %(us{(L%!%NC$J%$(KcT!(
( 
(    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r3   rL   r;   <module>rR      sJ   * / 3 1 # 78D(" D(  9D(N /rL   