
    iY                        d dl Z d dlmZ ddlmZmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZmZmZ d	d
lmZ d	dlmZmZmZmZ d	dlmZmZ  ej6                  e      Z ed      e G d de                    Z G d de      Z G d de      Z  G d de      Z! G d de      Z" G d de
      Z# G d de      Z$ G d de      Z%g dZ&y)     N)strict   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)BaseModelOutputWithPast)Unpack)TransformersKwargsauto_docstringlogging   )LlamaConfig)LlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaPreTrainedModel)Qwen2AttentionQwen2RotaryEmbeddingzfacebook/cwm)
checkpointc                       e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   d	Z
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZeee   z  dz  ed<   dZeed<   d Zeed!<   d"Zeez  ed#<   d$Zeed%<   d Zeed&<   dZedz  ed'<   d(Z eed)<   dZ!ee   dz  ed*<    e"       Z# fd+Z$ xZ%S ),	CwmConfigcwm    .Ai  
vocab_sizei   hidden_sizei T  intermediate_size@   num_hidden_layers0   num_attention_heads   num_key_value_heads   head_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacheNpad_token_ideos_token_idi  bos_token_idFtie_word_embeddingsg        attention_dropout   pretraining_tpmlp_biasrope_parameters    sliding_windowlayer_typesc                    | j                   ddddddd| _         | j                  4d}t        | j                        D cg c]  }||z  d	k(  rd
nd c}| _        | j                  rt        | j                        nd | _        t        | j                        | _        | j                  | j                  ng d| _        t        | $  di | y c c}w )Nr   g      0@g      @g      ?r5   llama3)
rope_thetafactorhigh_freq_factorlow_freq_factor original_max_position_embeddings	rope_type   r   full_attentionsliding_attention)i i i	  )
r4   r7   ranger   r6   intlistr-   super__post_init__)selfkwargswindow_patterni	__class__s       t/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/cwm/modular_cwm.pyrH   zCwmConfig.__post_init__D   s    ')$'#&48%$D  #N t556  '(.&8A&=!DWW D
 ;?:M:Mc$"5"56SW 0 01151B1B1ND--Tl'' s   C)&__name__
__module____qualname__
model_typedefault_thetar   rE   __annotations__r   r   r   r!   r#   r%   r'   strr(   r)   floatr*   r+   boolr,   r-   rF   r.   r/   r0   r2   r3   r4   dictr6   r7   AttributeErrorattention_biasrH   __classcell__rM   s   @rN   r   r   %   s'    JMJK"s"s!!  HcJ#)S)#u#L%It#L#*#+/L#S	/D(/L# %%%(us{(NCHd#'OTD['NC$(KcT!(#%N( (    r   c                       e Zd Zy)CwmRotaryEmbeddingNrO   rP   rQ   rC   r]   rN   r_   r_   ]       r]   r_   c                   (     e Zd Zdedef fdZ xZS )CwmAttentionconfig	layer_idxc                    t         |   ||       t        j                  j	                  |j
                  |j                  | j                  z  d      | _        t        j                  j	                  |j
                  |j                  | j                  z  d      | _
        t        j                  j	                  |j
                  |j                  | j                  z  d      | _        y )Nrd   re   F)bias)rG   __init__torchnnLinearr   r!   r%   q_projr#   k_projv_projrI   rd   re   rM   s      rN   ri   zCwmAttention.__init__b   s    )<hhoof&8&8&:T:TW[WdWd:dkpoqhhoof&8&8&:T:TW[WdWd:dkpoqhhoof&8&8&:T:TW[WdWd:dkpoqr]   rO   rP   rQ   r   rE   ri   r[   r\   s   @rN   rc   rc   a   s    ry rS r rr]   rc   c                   (     e Zd Zdedef fdZ xZS )CwmDecoderLayerrd   re   c                 L    t         |   ||       t        ||      | _        y )Nrg   )rG   ri   rc   	self_attnrp   s      rN   ri   zCwmDecoderLayer.__init__j   s#    )<%VyIr]   rq   r\   s   @rN   rs   rs   i   s    Jy JS J Jr]   rs   c                       e Zd Zy)CwmPreTrainedModelNr`   rC   r]   rN   rw   rw   o   ra   r]   rw   c                       e Zd Zy)CwmModelOutputWithPastNr`   rC   r]   rN   ry   ry   s   ra   r]   ry   c                        e Zd ZeZdef fdZ	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  de	dz  dej                  dz  d	edz  d
ee   defdZ xZS )CwmModelrd   c           	          t         |   |       t        j                  j	                  t        |j                        D cg c]  }t        ||       c}      | _        y c c}w )N)	rG   ri   rj   rk   
ModuleListrD   r   rs   layersrp   s      rN   ri   zCwmModel.__init__z   sI     hh))AFvG_G_A`aI_VY/a
as   A!N	input_idsattention_maskposition_idspast_key_valuesinputs_embedsr+   rJ   returnc           	         |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }t        |x}	t              s9| j                  ||||d}
|
j                         }t        d
i |
t        d
i |d}	|}| j                  ||      }t!        | j"                  d | j                  j$                         D ]-  \  }} ||f|	| j                  j&                  |      |||d|}/ | j)                  |      }t+        ||	      S )Nz:You must specify exactly one of input_ids or inputs_embeds)rd   r   r1   )device)rd   r   r   r   r   )rA   rB   )r   r   r   position_embeddings)last_hidden_stater   rC   )
ValueErrorembed_tokensr   rd   get_seq_lengthrj   arangeshaper   	unsqueeze
isinstancerX   copyr   r   
rotary_emb	enumerater~   r   r7   normry   )rI   r   r   r   r   r   r+   rJ   past_seen_tokenscausal_mask_mappingmask_kwargssliding_mask_kwargshidden_statesr   rL   decoder_layers                   rN   forwardzCwmModel.forward   s    -t";<YZZ *.*;*;I*FM0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L?-F++!."0#2 ,K #."2"2"4 #5"C{"C%F%]I\%]#
 &"oom\J )$++6U8U8U*V W 	A})24;;3J3J13MN) /$7 M	 		-0%++
 	
r]   )NNNNNN)rO   rP   rQ   r   config_classri   rj   
LongTensorTensorr   FloatTensorrW   r
   r   ry   r   r[   r\   s   @rN   r{   r{   w   s    L
y 
 .2.204(,26!%8
##d*8
 t+8
 &&-	8

 8
 ((4/8
 $;8
 +,8
 
 8
r]   r{   c                       e Zd Zy)CwmForCausalLMNr`   rC   r]   rN   r   r      ra   r]   r   )r   rw   r{   r   )'rj   huggingface_hub.dataclassesr   cache_utilsr   r   masking_utilsr   r   modeling_outputsr	   processing_utilsr
   utilsr   r   r   llama.configuration_llamar   llama.modeling_llamar   r   r   r   qwen2.modeling_qwen2r   r   
get_loggerrO   loggerr   r_   rc   rs   rw   ry   r{   r   __all__rC   r]   rN   <module>r      s      . . R 7 & @ @ 3  H 
		H	% >*3( 3(  +3(l	- 	r> rJ' J	- 		4 	A
z A
H	% 	r]   