
    i                     r    d dl mZ ddlmZ ddlmZ ddlmZ  ed      e G d d	e                    Zd	gZ	y
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzCohereForAI/c4ai-command-r-v01)
checkpointc                       e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d <   d!Zee
d"<   d#Ze	dz  e
d$<   d%Ze	dz  e
d&<   d'Ze	ee	   z  dz  e
d(<   d!Zee
d)<   dZee z  dz  e
d*<   d+Z!ee
d,<   d-Z"ee	z  e
d.<   d/Z#e	dz  e
d0<   dZ$ee   dz  e
d1<    fd2Z% xZ&S )3Cohere2Configa  
    logit_scale (`float`, *optional*, defaults to 0.0625):
        The scaling factor for the output logits.

    ```python
    >>> from transformers import Cohere2Model, Cohere2Config

    >>> # Initializing a Cohere Nextmodel configuration
    >>> configuration = Cohere2Config()

    >>> # Initializing a model from the Cohere2 configuration
    >>> model = Cohere2Model(configuration) # doctest: +SKIP

    >>> # Accessing the model configuration
    >>> configuration = model.config # doctest: +SKIP
    ```
    cohere2past_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei    hidden_sizei X  intermediate_sizeg      ?logit_scale(   num_hidden_layers@   num_attention_headsNnum_key_value_headssilu
hidden_actmax_position_embeddingsg{Gz?initializer_rangegh㈵>layer_norm_epsT	use_cacher   pad_token_id   bos_token_idi eos_token_idtie_word_embeddingsrope_parametersFattention_biasg        attention_dropouti   sliding_windowlayer_typesc                 V   | j                   | j                  | _         | j                  | j                  z  | _        | j                  M|j                  dd      }t        | j                        D cg c]  }t        |dz   |z        rdnd c}| _        t        | (  di | y c c}w )Nsliding_window_pattern      sliding_attentionfull_attention )r   r   r   head_dimr.   popranger   boolsuper__post_init__)selfkwargs_sliding_window_patterni	__class__s       /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/cohere2/configuration_cohere2.pyr;   zCohere2Config.__post_init__X   s    ##+'+'?'?D$ ((D,D,DD #&,jj1I1&M# t556  (,QU6M,M'N#Tdd D
 	'' s   2B&)'__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   floatr   r   r   r    strr!   r"   r#   r$   r9   r%   r'   r(   listr)   r*   r   dictr+   r,   r-   r.   r;   __classcell__)r@   s   @rA   r
   r
      s   $ J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"Ks!!&*t*J#'S'#u# NE It L#*  L#* +1L#S	/D(1 $$48O^d*T18 ND %(us{(!%NC$J%$(KcT!(( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r5   rQ   rA   <module>rW      sK   * / 3 1 # ;<J($ J(  =J(Z 
rQ   