
    i>                     f    d dl mZ ddlmZ ddlmZ  ed      e G d de                    ZdgZy	)
    )strict   )PreTrainedConfig)auto_docstringzfacebook/cwm)
checkpointc                       e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d<   dZee
d<   d Zee
d!<   d"Ze	d"z  e
d#<   d$Ze	e
d%<   d"Ze	ee	   z  d"z  e
d&<   d'Ze	e
d(<   d)Zee
d*<   d"Zed"z  e
d+<   d,Z ee	z  e
d-<   d)Z!ee
d.<   d/Z"e	e
d0<   d1Z#d2Z$e	e
d3<   d"Z%ee   d"z  e
d4<    fd5Z&d6 Z' xZ(S )7	CwmConfigaf  
    ```python
    >>> from transformers import CwmModel, CwmConfig

    >>> # Initializing a Cwm cwm-7b style configuration
    >>> configuration = CwmConfig()

    >>> # Initializing a model from the cwm-7b style configuration
    >>> model = CwmModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```cwmpast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei T  intermediate_size@   num_hidden_layers0   num_attention_heads   num_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacheNpad_token_idi  bos_token_ideos_token_id   pretraining_tpFtie_word_embeddingsrope_parametersg        attention_dropoutmlp_bias   head_dim    .A    sliding_windowlayer_typesc                 6   | j                   ddddddd| _         | j                  4d}t        | j                        D cg c]  }||z  d	k(  rd
nd c}| _        | j                  rt        | j                        nd | _        t        | j                        | _        | j                  | j                  ng d| _        | j                  | j                  | j                  z  | _        | j                  | j                  | _        t        | 4  di | y c c}w )Nr/   g      0@g      @g      ?r0   llama3)
rope_thetafactorhigh_freq_factorlow_freq_factor original_max_position_embeddings	rope_type   r   full_attentionsliding_attention)i i i	  )r*   r2   ranger   r1   intlistr&   r.   r   r   r   super__post_init__)selfkwargswindow_patterni	__class__s       z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/cwm/configuration_cwm.pyrC   zCwmConfig.__post_init__W   s   ')$'#&48%$D  #N t556  '(.&8A&=!DWW D
 ;?:M:Mc$"5"56SW 0 01151B1B1ND--Tl==  ,,0H0HHDM##+'+'?'?D$'' s   Dc                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)rD   s    rI   validate_architecturezCwmConfig.validate_architecturet   sS    d666!;#D$4$4#5 622327  <    ))__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   r@   __annotations__r   r   r   r   r   r   strr    r!   floatr"   r#   boolr$   r%   r&   rA   r(   r)   r*   dictr+   r,   r.   default_thetar1   r2   rC   rL   __classcell__)rH   s   @rI   r	   r	      s    J#4"5 &/%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!  J#)S)#u#L%It#L#*#L#+/L#S	/D(/NC %%#'OTD['%(us{(HdHcMNC$(KcT!((:rM   r	   N)huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r>   rM   rI   <module>ra      sF   , / 3 # >*\  \  +\~ -rM   