
    i,4                     F   d dl mZ d dlZd dlmc mZ d dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'  e       r	d dl(m)Z)m*Z* nd\  Z)Z*e)e*fZ+ e,e+      Z- ej\                  e/      Z0 G d de#      Z1 G d de      Z2 G d dejf                        Z4 G d de      Z5 G d dejf                        Z6 G d d e      Z7 G d! d"e"      Z8 G d# d$e!      Z9 G d% d&e       Z:g d'Z;y)(    )CallableN)nn   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)is_causal_conv1d_availableis_torchdynamo_compiling   )apply_mask_to_padding_states)Gemma2RotaryEmbedding)LlamaAttentionLlamaForCausalLM
LlamaModelLlamaPreTrainedModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )
Lfm2Config)causal_conv1d_fncausal_conv1d_updateNNc                       e Zd Zy)Lfm2RMSNormN__name__
__module____qualname__     v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/lfm2/modular_lfm2.pyr!   r!   7       r'   r!   c                       e Zd Zy)Lfm2RotaryEmbeddingNr"   r&   r'   r(   r+   r+   ;   r)   r'   r+   c                   *     e Zd Zdef fdZd Z xZS )Lfm2MLPconfigc                    t         |           |j                  }|j                  rat	        d|z  dz        }|j
                  Dt	        |j
                  |z        }|j                  ||j                  z   dz
  |j                  z  z  }t        j                  |j                  |d      | _
        t        j                  |j                  |d      | _        t        j                  ||j                  d      | _        y )Nr   r   r   Fbias)super__init__intermediate_sizeblock_auto_adjust_ff_dimintblock_ffn_dim_multiplierblock_multiple_ofr   Linearhidden_sizew1w3w2)selfr.   r4   	__class__s      r(   r3   zLfm2MLP.__init__@   s    "44** #A(9$9A$= >..:$'(G(GJ[([$\!$*$<$<&)A)AAAE&JbJbb%! ))F..0AN))F..0AN))-v/A/ANr'   c                     | j                  t        j                  | j                  |            | j	                  |      z        S N)r=   Fsilur;   r<   )r>   xs     r(   forwardzLfm2MLP.forwardO   s/    wwqvvdggaj)DGGAJ677r'   )r#   r$   r%   r   r3   rE   __classcell__r?   s   @r(   r-   r-   ?   s    Oz O8r'   r-   c                        e Zd Zdedef fdZ	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	eej                  ej                  dz  f   f
d
Z
 xZS )Lfm2Attentionr.   	layer_idxc                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        t        | j                  |j                        | _        t        | j                  |j                        | _        | `| `y )NFr0   eps)r2   r3   r   r9   r:   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projout_projr!   norm_epsq_layernormk_layernormo_projattention_dropoutr>   r.   rJ   r?   s      r(   r3   zLfm2Attention.__init__T   s    +ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejk		&"<"<t}}"LfN`N`glm&t}}&//J&t}}&//JK"r'   Nhidden_statesposition_embeddingsattention_maskpast_key_valuesreturnc                 
   |j                   d d }g |d| j                  }| j                   | j                  |      j                  |       j                  dd      }| j                   | j                  |      j                  |       j                  dd      }	 | j                  |      j                  | j                  dd      }
|\  }}t        ||	||      \  }}	| |j                  |	|
| j                        \  }	}
t        j                  | j                  j                  t               } || ||	|
|fd| j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr   r   g        )dropoutscaling)shaperO   rV   rP   view	transposerW   rR   rS   r   updaterJ   r   get_interfacer.   _attn_implementationr   rc   reshape
contiguousrT   )r>   r[   r\   r]   r^   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightsoutputs                    r(   rE   zLfm2Attention.forward_   s    $))#2.88b8$--8''(GM(B(G(G(VWaabcefg%%&Edkk-&@&E&E|&TU__`acde
6t{{=166EOOPQSTU&S#7jRUWZ#[ j&'6'='=j,X\XfXf'g$J(?(M(MKK,,.E)
 %8	%
 LL	%
 	%
!\ *k));;;;FFH{+|##r'   rA   )r#   r$   r%   r   r6   r3   torchTensortupler   rE   rF   rG   s   @r(   rI   rI   S   s    	#z 	#c 	#  )-%$||%$ #5<<#=>%$ t+	%$
 %$ 
u||U\\D00	1%$r'   rI   c                       e Zd Zdedef fdZ	 	 ddej                  dedz  dej                  dz  fdZ		 	 ddej                  dedz  dej                  dz  fd	Z
	 	 dd
ej                  dedz  dej                  dz  fdZ xZS )Lfm2ShortConvr.   rJ   c           	      2   t         |           || _        || _        |j                  | _        |j                  | _        t        j                  |j                  |j                  | j
                  |j                  | j                  | j
                  dz
        | _        t        j                  |j                  d|j                  z  | j                        | _        t        j                  |j                  |j                  | j                        | _        y )Nr   )in_channelsout_channelskernel_sizegroupsr1   paddingr   r0   )r2   r3   r.   rJ   conv_L_cacheL_cache	conv_biasr1   r   Conv1dr:   convr9   in_projrT   rZ   s      r(   r3   zLfm2ShortConv.__init__   s    
 	"**$$	II**++%%LL1$
	 yy!3!3Q9K9K5KRVR[R[\		&"4"4f6H6HtyyYr'   NrD   r^   r]   c                    t        ||      }| j                  |      j                  dd      }|j                  dd      \  }}}||z  }| j                  j
                  j                  | j                  j
                  j                  d      | j                  j
                  j                  d            }||j                  | j                        ret        |j                  d      |j                  | j                     j                  || j                  j                  d       }	|	j                  d      }	n}|Xt         j"                  j%                  || j&                  |j(                  d   z
  df      }
|j+                  |
| j                        }
t-        ||| j                  j                  d       }	||	z  }| j/                  |j                  dd      j1                               }|S )Nra   r   dimr   r   )
activation)r   r   rf   chunkr   weightre   sizehas_previous_staterJ   r   squeezelayersconv_statesr1   	unsqueezer   
functionalpadr   rd   update_conv_stater   rT   rk   )r>   rD   r^   r]   BCxBCBxconv_weightsconv_out
conv_stateys               r(   cuda_kernels_forwardz"Lfm2ShortConv.cuda_kernels_forward   s    )N;ll1o''B/))A2)&1aUyy'',,TYY-=-=-B-B1-EtyyGWGWG\G\]^G_`&?+M+Mdnn+]+

2&&t~~6BB		H  ))"-H*]]..rDLL288B<4OQR3ST
,>>z4>>Z
'L$))..UYZHLMM!++b"-88:;r'   c                    |j                   d   }t        ||      }| j                  |      j                  dd      }|j	                  dd      \  }}}||z  }||j                  | j                        r|j                  || j                        }	t        j                  |	j                  |j                        | j                  j                  d d dd d f   z  d      }
| j                  r|
| j                  j                  z  }
|
j                  d      }
nr|Xt         j"                  j%                  || j&                  |j                   d   z
  df      }	|j                  |	| j                        }	| j                  |      dd |f   }
||
z  }|j                  dd      j)                         }| j+                  |      }|S )Nr   ra   r   r   r   r   .)rd   r   r   rf   r   r   rJ   r   rx   sumtodevicer   r   r1   r   r   r   r   r   rk   rT   )r>   rD   r^   r]   seqlenr   r   r   r   r   r   r   s               r(   slow_forwardzLfm2ShortConv.slow_forward   s    (N;ll1o''B/))A2)&1aU&?+M+Mdnn+](::2t~~NJyyryy!9DII<L<LQPQSTW<U!U[]^HyyDIINN*))"-H*]]..rDLL288B<4OQR3ST
,>>z4>>Z
yy}S'6'\2HLKKB**,MM!r'   r[   c                     t         r5d|j                  j                  v rt               s| j	                  |||      S | j                  |||      S )Ncuda)is_fast_path_availabler   typer   r   r   )r>   r[   r^   r]   s       r(   rE   zLfm2ShortConv.forward   sJ     "f0D0D0I0I&IRjRl,,]O^\\  PPr'   r   )r#   r$   r%   r   r6   r3   rx   ry   r   r   r   rE   rF   rG   s   @r(   r|   r|      s    ZZ Z2 )-.2	<<  t+	H )-.2	<<  t+	H )-.2	Q||Q Q t+	Qr'   r|   c                        e Zd Zdedef fdZ	 	 	 	 ddej                  deej                  ej                  f   dz  dej                  dz  dej                  dz  d	e
dz  d
ej                  fdZ xZS )Lfm2DecoderLayerr.   rJ   c                 f   t         |           |j                  |   dk(  | _        | j                  rt	        ||      | _        nt        ||      | _        t        |      | _	        t        |j                  |j                        | _        t        |j                  |j                        | _        y )Nfull_attentionrL   )r2   r3   layer_typesis_attention_layerrI   	self_attnr|   r   r-   feed_forwardr!   r:   rU   operator_normffn_normrZ   s      r(   r3   zLfm2DecoderLayer.__init__   s    "("4"4Y"?CS"S""*69=DN%fi8DI#FO(););Q#F$6$6FOOLr'   Nr[   r\   r]   position_idsr^   r_   c           	         |}| j                   r+ | j                  d| j                  |      ||||d|\  }}n#| j                  | j                  |      ||      }||z   }|| j	                  | j                  |            z   }|S )N)r[   r\   r]   r   r^   )r[   r^   r]   r&   )r   r   r   r   r   r   )	r>   r[   r\   r]   r   r^   rl   residual_s	            r(   rE   zLfm2DecoderLayer.forward   s     !""-t~~  "00?$7-) /   M1 !II"00? /- & M
 &0%(9(9$--:V(WWr'   )NNNN)r#   r$   r%   r   r6   r3   rx   ry   rz   
LongTensorr   rE   rF   rG   s   @r(   r   r      s    
Mz 
Mc 
M IM.204(,|| #5<<#=>E t+	
 &&-  
r'   r   c                       e Zd ZdZy)Lfm2PreTrainedModelFN)r#   r$   r%   _can_compile_fullgraphr&   r'   r(   r   r     s    "r'   r   c                        e Zd Zdef fdZ	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  d	e
dz  d
ee   defdZ xZS )	Lfm2Modelr.   c                 t    t         |   |       t        |j                  |j                        | _        | `y )NrL   )r2   r3   r!   r:   rU   embedding_normnorm)r>   r.   r?   s     r(   r3   zLfm2Model.__init__  s.     )&*<*<&//RIr'   N	input_idsr]   r   r^   inputs_embeds	use_cacherl   r_   c           	         |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }t        | j                  ||||      }	|j                  d   dk7  r|nd }
|}| j                  ||      }t        | j                  d | j                  j                         D ]3  \  }}| j                  j                  |   dk(  r|	n|
} ||f||||d	|}5 | j!                  |      }t#        ||
      S )Nz:You must specify exactly one of input_ids or inputs_embeds)r.   r   r   )r   )r.   r   r]   r^   r   )r   r   )r]   r\   r   r^   )last_hidden_stater^   )
ValueErrorembed_tokensr   r.   get_seq_lengthrx   arangerd   r   r   r   
rotary_emb	enumerater   num_hidden_layersr   r   r
   )r>   r   r]   r   r^   r   r   rl   past_seen_tokenscausal_masklinear_attentionr[   r\   idecoder_layer
layer_masks                   r(   rE   zLfm2Model.forward!  s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 .;-@-@-Cq-H>d%"oom,oW !*$++6U8U8U*V W 		A}(,(?(?(BFV(V\lJ))$7) / M		 ++M:&++
 	
r'   )NNNNNN)r#   r$   r%   r   r3   rx   r   ry   r   FloatTensorboolr   r   r
   rE   rF   rG   s   @r(   r   r     s    z  .2.204(,26!%6
##d*6
 t+6
 &&-	6

 6
 ((4/6
 $;6
 +,6
 
!6
r'   r   c                       e Zd Zy)Lfm2ForCausalLMNr"   r&   r'   r(   r   r   Z  r)   r'   r   )r   r   r   )<collections.abcr   rx   torch.nn.functionalr   r   rB   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   utils.import_utilsr   r   bamba.modeling_bambar   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   configuration_lfm2r   causal_conv1dr   r   kernel_modulesallr   
get_loggerr#   loggerr!   r+   Moduler-   rI   r|   r   r   r   r   __all__r&   r'   r(   <module>r      s   %     . / 9 7 5 & 0 V ? :   + DD-7** #$89^,  
		H	%	, 		/ 	8bii 8(1$N 1$haQBII aQH)1 )X#. #<

 <
~	& 	 Br'   