
    i                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zOLMoE model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzallenai/OLMoE-1B-7B-0924)
checkpointc                       e Zd ZU dZdZdgZddiZdddddd	d
dZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d<   dZee
d<   d Ze	dz  e
d!<   dZe	dz  e
d"<   d#Ze	ee	   z  dz  e
d$<   d%Zee
d&<   dZeez  dz  e
d'<   d%Z ee
d(<   d)Z!ee	z  e
d*<   dZ"edz  e
d+<   d,Z#e	e
d-<   d.Z$e	e
d<   d%Z%ee
d/<   d0Z&ee
d1<   d%Z'ee
d2<    fd3Z( xZ)S )4OlmoeConfiga6  
    clip_qkv (`float`, *optional*):
        If not `None`, elements of query, key and value attention states are clipped so that their
        absolute value does not exceed this value.

    ```python
    >>> from transformers import OlmoeModel, OlmoeConfig

    >>> # Initializing a OLMoE 7B A1B style configuration
    >>> configuration = OlmoeConfig()

    >>> # Initializing a model from the OLMoE 7B A1B style configuration
    >>> model = OlmoeModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    olmoepast_key_valuesnum_local_expertsnum_expertscolwise_gather_outputrowwise_split_inputpacked_colwiserowwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.expertsi  
vocab_sizei   hidden_sizeintermediate_size   num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cache   pad_token_idbos_token_idig  eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutclip_qkv   num_experts_per_tok@   output_router_logitsg{Gz?router_aux_loss_coefnorm_topk_probc                 ^    | j                   | j                  | _         t        |   di | y )N )r   r   super__post_init__)selfkwargs	__class__s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/olmoe/configuration_olmoe.pyr3   zOlmoeConfig.__post_init__S   s-    ##+'+'?'?D$''    )*__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapbase_model_tp_planr   int__annotations__r   r   r   r   r   r   strr   r   floatr   r    boolr"   r#   r$   listr%   r&   r   dictr'   r(   r)   r+   r   r-   r.   r/   r3   __classcell__)r6   s   @r7   r
   r
      ss   & J#4"5(-8M &=%<%<%:-=*3 0 JK!s!s!!&*t*J#'S'#u#L%It L#* #L#*#+0L#S	/D(0 %%48O^d*T18 ND %(us{(!Hedl!  K!&$&"&%& ND ( (r8   r
   N)r<   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r1   r8   r7   <module>rN      sJ      . 3 1 # 56?(" ?(  7?(D /r8   