
    i                    V   d dl Z d dlmZmZ d dlmZ d dlmZ d dlZd dl	m
Z
 d dlm
c mZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlm Z m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z:m;Z;m<Z< ee. G d de!                    Z=e e.d       G d de                     Z>e e.d       G d d e,                    Z? G d! d"e
j                        ZA G d# d$e
j                        ZB G d% d&e
j                        ZC G d' d(e
j                        ZD G d) d*e
j                        ZE G d+ d,e
j                        ZF G d- d.e
j                        ZG G d/ d0e
j                        ZH G d1 d2e
j                        ZI G d3 d4e
j                        ZJ G d5 d6e
j                        ZL G d7 d8e
j                        ZM G d9 d:e
j                        ZN G d; d<e
j                        ZOd= ZPd>ej                  d?eRd@ej                  fdAZS	 	 	 didBe
j                  dCej                  dDej                  dEej                  dFej                  dz  dGeTeRz  dHeTdz  dIeTdz  d@eUej                  ej                  f   fdJZVdjdKej                  dLej                  dMej                  dNeRfdOZW eeW       G dP dQe
j                               ZX G dR dSe      ZYe. G dT dUe(             ZZ G dV dWeZ      Z[ G dX dYe
j                        Z\ e.dZ       G d[ d\eZ             Z] e.d]       G d^ d_eZe             Z^ G d` dae
j                        Z_ e.db       G dc ddeZ             Z` e.de       G df dgeZe             Zag dhZby)k    N)CallableSequence)	dataclass)Optional   )initialization)ACT2FN)CacheDynamicCache)GenerationMixin)use_kernelized_func)create_causal_mask!create_sliding_window_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPastBaseModelOutputWithPoolingCausalLMOutputWithPast)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringcan_return_tupletorch_compilable_check)maybe_autocastmerge_with_config_defaults)capture_outputs   )	AutoModel   )Gemma3nAudioConfigGemma3nConfigGemma3nTextConfigGemma3nVisionConfigc                   :    e Zd ZU dZdZej                  dz  ed<   y)Gemma3nAudioEncoderModelOutputzy
    audio_mel_mask (`torch.BoolTensor`, *optional*):
        A torch.BoolTensor of shape `(batch_size, num_frames)`
    Naudio_mel_mask)__name__
__module____qualname____doc__r*   torch
BoolTensor__annotations__     }/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/gemma3n/modeling_gemma3n.pyr)   r)   6   s    
 /3NE$$t+2r3   r)   zL
    Base class for Gemma3n outputs, with hidden states and attentions.
    custom_introc                   b    e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   y)Gemma3nModelOutputWithPasta  
    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder and after projecting the last hidden state.
    audio_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        audio_hidden_states of the model produced by the audio encoder and after projecting the last hidden state.
    Nimage_hidden_statesaudio_hidden_states)	r+   r,   r-   r.   r9   r/   FloatTensorr1   r:   r2   r3   r4   r8   r8   A   s5     59**T1848**T18r3   r8   zS
    Base class for Gemma3n causal language model (or autoregressive) outputs.
    c                   "   e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
dz  ed<   dZeej                     dz  ed<   dZeej                     dz  ed<   dZej                  dz  ed<   dZej                  dz  ed	<   y)
Gemma3nCausalLMOutputWithPastaF  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.text_config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder after projecting last hidden state.
    audio_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        audio_hidden_states of the model produced by the audio encoder and after projecting the last hidden state.
    Nlosslogitspast_key_valueshidden_states
attentionsr9   r:   )r+   r,   r-   r.   r>   r/   r;   r1   r?   r@   r
   rA   tuplerB   r9   r:   r2   r3   r4   r=   r=   [   s    $ &*D%

d
")'+FE$+$(OUT\(59M5**+d2926Je''(4/648**T1848**T18r3   r=   c                        e Zd Zd	dededef fdZdej                  fdZ	dej                  dej                  fdZ
 xZS )
Gemma3nRMSNormdimeps
with_scalec                     t         |           || _        || _        | j                  r0t	        j
                  t        j                  |      d      | _        y y )NT)requires_grad)	super__init__rG   rH   nn	Parameterr/   onesweight)selfrF   rG   rH   	__class__s       r4   rL   zGemma3nRMSNorm.__init__   sB    $??,,uzz#dKDK r3   rA   c                     |j                  d      j                  dd      | j                  z   }|t        j                   |d      z  S )Nr!   T)keepdim      )powmeanrG   r/   )rQ   rA   mean_squareds      r4   _normzGemma3nRMSNorm._norm   sA    $((+00T0BTXXMuyyt<<<r3   returnc                     | j                  |j                               }| j                  r|| j                  j                         z  }|j	                  |      S N)rZ   floatrH   rP   type_as)rQ   rA   normed_outputs      r4   forwardzGemma3nRMSNorm.forward   sH    

=#6#6#89??)DKK,=,=,??M$$]33r3   )gư>T)r+   r,   r-   intr^   boolrL   r/   TensorrZ   ra   __classcell__rR   s   @r4   rE   rE   ~   sL    LC Le L L=5<< =
4U\\ 4ell 4r3   rE   c                       e Zd Zdef fdZdej                  dej                  dej                  fdZdej                  de	d	e	d
e	de	de	de	dej                  fdZ
dej                  dej                  dej                  fdZ xZS )%Gemma3nAudioRelativePositionEmbeddingconfigc                 R   t         |           || _        | j                  j                  | _        | j                  j
                  | _        | j                  | j                  z  | _        t        d| j                  j                  dz
        | _
        | j                  j                  | _        t        j                  | j                  | j                  | j                  z  d      | _        d}d}| j                  dz  }t!        j"                  t%        |      t%        |      z        t        |dz
  d      z  }|t'        j(                  t'        j*                  |      | z        z  }| j-                  d|j%                         j/                  d      j/                  d      d	       y )
Nr   r#   Fbias      ?     @r!   inv_timescales
persistent)rK   rL   ri   conf_num_attention_heads	num_headshidden_sizechannelshead_dimmaxconf_attention_context_leftmax_backwardconf_attention_context_rightmax_forwardrM   Linearpos_projmathlogr^   r/   exparangeregister_buffer	unsqueeze)rQ   ri   min_timescalemax_timescalenum_timescaleslog_timescale_incrementro   rR   s          r4   rL   z.Gemma3nAudioRelativePositionEmbedding.__init__   sL   ==//74;;#J#JQ#NO;;CC		$--$--1OV[\!+"&((5+?%BV+V"WZ]^lop^prsZt"t&5<<3OSjRj3j)kk  ",,Q/99!< 	 	
r3   positiondtyper[   c                 P   |j                         j                  d      }|| j                  j                  |j                  t
        j                        z  }t        j                  t        j                  |      t        j                  |      gd      }|j                  |      S )NrT   devicer   rF   )r^   r   ro   tor   r/   float32catsincostype)rQ   r   r   scaled_timetiming_signals        r4   _get_timing_signal_1d_posz?Gemma3nAudioRelativePositionEmbedding._get_timing_signal_1d_pos   s}    >>#--b1!4!4!7!7xV[VcVc!7!dd		599[#9599[;Q"RXZ[!!%((r3   term_bd_before_shift
batch_sizers   num_query_blocksquery_block_sizekey_context_sizemax_span_plus_1c                     |dz   |z
  }d|f}	t         j                  j                  ||	      }
|
j                  |||||dz   z  f      }|ddddddd||z  f   }|j                  |||||f      }|S )aZ  Performs the relative shift.

        Args:
          term_bd_before_shift: Tensor of shape [B, N, U, W, F_span]. batch_size
            (B), num_heads (N), num_query_blocks (U), query_block_size (W),
            key_context_size (C = W+L+R), max_span_plus_1 (F_span = L+R+1).

        Returns:
          Tensor of shape [B, N, U, W, C].
        r#   r   N)rM   
functionalpadreshape)rQ   r   r   rs   r   r   r   r   pad_amount_last_dimpadding_tupleterm_bd_paddedterm_bd_reshapedterm_bd_slicedterm_bd_shifteds                 r4   _relative_shiftz5Gemma3nAudioRelativePositionEmbedding._relative_shift   s    4  0!3F /0**+?O
 *11  $4q$89	
 *!Q3X5EHX5X3X*XY )00   
 r3   querieskeysc           	      R   |j                   \  }}}}}|j                   \  }}}	}}t        j                  | j                  | j                   dz
  d|j
                        j                  d      }
|
j                   d   }| j                  |
|j                        }| j                  |      }|j                  d|| j                  | j                        j                  d      }|j                  ddddd      }|j                  ddddd      }t        j                  ||      }|j                  ddddd      }|j                  ddd      }|j                  ||||z  |      }t        j                  ||      }|j                  |||||      }| j!                  ||||||	|      }||z   S )	Nr#   rT   r   r   r   r   r!      )shaper/   r   ry   r{   r   r   r   r   r}   r   rs   rv   squeezepermutematmulr   )rQ   r   r   r   r   r   rs   rv   _r   pos_indicesr   sin_emb_timing_signalprojected_sin_embsin_emb	queries_pkeys_p_tterm_ac
q_permuted
s_permuted
q_reshapedterm_bd_unshifed_matmulterm_bd_unshifedr   s                           r4   ra   z-Gemma3nAudioRelativePositionEmbedding.forward   s    OVmmK
$&6	8'+zz$11 ll4#4#4t7G7G6G!6KRX_XfXfgqq
 &++A. $ > >w}} !? !

 !MM*?@#++APTP]P]^ff
 OOAq!Q2	<<1aA.,,y(3 __Q1a3
 __Q1-
  ''
I?ORb?bdlm

 #(,,z:"F 3::
 ..
 ((r3   )r+   r,   r-   r$   rL   r/   rd   r   r   rb   r   ra   re   rf   s   @r4   rh   rh      s    
1 
.)%,, )u{{ )W\WcWc );#ll; ; 	;
 ; ; ; ; 
;zL)u|| L)5<< L)ELL L)r3   rh   c                   *    e Zd Zdef fdZd Zdej                  dededej                  fdZ	d	ej                  dej                  fd
Z
d	ej                  dej                  fdZd	ej                  dej                  dej                  fdZ xZS )Gemma3nAudioAttentionri   c                    t         |           || _        | j                  j                  | _        | j                  j
                  | _        | j
                  | j                  z  | _        | j                  j                  | _        | j                  j                  | _
        t        d| j                  j                  dz
        | _        | j                  j                  | _        | j                  | j                  z   | j                  z   | _        t#        |      | _        t'        j(                  t+        j,                  | j                  f            | _        t'        j0                  | j
                  | j                  | j                  z  d      | _        t'        j0                  | j
                  | j                  | j                  z  d      | _        t'        j0                  | j
                  | j                  | j                  z  d      | _        | j                  dz  }dt*        j&                  j8                  j;                  t+        j<                  d            z  }| j?                  d||z  jA                         jC                         d	       | jE                         }| j?                  d
|d	       | j?                  dt+        j<                  | j                        jG                         d	       y )Nr   r#   Frk   rV   rm           q_scalerp   local_causal_valid_masksoftcap)$rK   rL   ri   rr   rs   rt   rv   conf_attention_chunk_size
chunk_sizerz   max_future_horizonrw   rx   max_past_horizonconf_attention_logit_capattention_logits_soft_capcontext_sizerh   relative_position_embeddingrM   rN   r/   zerosper_dim_scaler|   q_projk_projv_projr   softplustensorr   clonedetachcreate_local_causal_valid_maskr^   )rQ   ri   r   r_softplus_0r   rR   s        r4   rL   zGemma3nAudioAttention.__init__A  s)   ==;;22((DNN:++??"&++"J"J #At{{'N'NQR'R S)-)M)M& OOd.C.CCdF]F]]+PQW+X(\\%++t}}6F*GHii 0 0$..4==2PW\]ii 0 0$..4==2PW\]ii 0 0$..4==2PW\]--%UXX0099%,,s:KLLY<)?(F(F(H(O(O(Q^cd"&"E"E"G68O\abLL778>>@ 	 	
r3   c                    t        j                  t        j                  | j                  | j                  ft         j
                        d      j                  }t        j                  t        j                  | j                  | j                  ft         j
                        | j                  | j                  z         }t        j                  | j                  | j                  ft         j
                        }||z  |z  }|S )Nr   r   )diagonal)	r/   trilrO   r   r   rc   Tr   r   )rQ   lower_causal_maskupper_causal_maskr   s       r4   r   z4Gemma3nAudioAttention.create_local_causal_valid_maskc  s    !JJJJ))4??;5::N
 ! 	 "JJJJ):):;5::N**T-D-DD
 #(**doot?P?P-QY^YcYc"d"9<M"MPa"a&&r3   xpad_left	pad_rightr[   c                     |j                   ^}}}|j                  ||g|      }|j                  ||g|      }t        j                  |||gd      }|S )Nr#   r   )r   	new_zerosr/   r   )	rQ   r   r   r   batchr   
tail_shapeleftrights	            r4   	_pad_dim1zGemma3nAudioAttention._pad_dim1p  s^     !q:{{E89j9:UI;
;<IItQ&A.r3   rA   c                 (   |j                   }|dd \  }}|| j                  z   dz
  | j                  z  }|| j                  z  |z
  x}dkD  r| j                  |d|      }||| j                  f|dd z   }|j                  |      j	                         }|S )aE  Turns a sequence to non overlapping blocks.

        Args:
            hidden_states: a tensor of [batch, time, ...].

        Returns:
            A tensor of [batch, num_blocks, block_size, ...], with necessary
            paddings,
            where output[:, i, ...] are x[:, i*block_size:(i+1)*block_size, ...].
        Nr!   r#   r   )r   r   r   r   
contiguous)rQ   rA   r   bt
num_blockspadding_lenpermute_dimss           r4   _convert_to_blockz'Gemma3nAudioAttention._convert_to_blockw  s     ##Ray1$//)A-$//A
%7!;;Kq@ NN=![IM:t7%)C%--l;FFHr3   c                 \   | j                   }| j                  | j                  z   dz
  }| j                  |||      }| j                  }| j                  }|j                  d||      }|j                  dkD  r'|j                  dkD  rt        j                  |dd      }|j                         S )a  Extracts temporal context for every block.

        Args:
            hidden_states: a tensor of [batch, time, ...].

        Returns:
            A tensor of [batch, num_blocks, context_size, ...], with necessary
            paddings,
            where context_size = block_size + left_context + right_context,
            and output[:, i, ...] are x[:, start-left_context:end+right_context,
            ...],
            start = i * block_size, end = (i + 1) * block_size.
        r#   )	dimensionsizestepr!   r   rT   )sourcedestination)
r   r   r   r   r   unfoldndimr/   movedimr   )rQ   rA   r   r   	frame_len
frame_step
x_unfoldeds          r4   _extract_block_contextz,Gemma3nAudioAttention._extract_block_context  s     (( ++doo=A	}h	J%%	__
 #))AIJ)W
 !joo&9 z"!LJ$$&&r3   maskc                 	   g |j                   d d | j                  | j                  }| j                  |      j	                  |      j                         }| j                  |      j	                  |      j                         }| j                  |      j	                  |      j                         }t        j                  j                  j                  | j                        }ddd| j                  f}|j                  |      }	|| j                  z  |	z  }|j                   d d \  }
}| j                  |      }| j!                  |      }| j!                  |      }|j                   d   }| }| j!                  |      }|j"                  dk(  rI|j                   d   |j                   d   z  | j$                  k(  r|j	                  |
|| j$                        }|j                   |
|| j$                  fk7  r,t'        d|j                    d|
 d| d| j$                   d		      |j)                  d      j)                  d
      }| j*                  j)                  d      j)                  d      j)                  d      }t        j,                  ||j/                  |j0                              }| j3                  ||      }| j4                  j/                  |j0                        }||z  }t        j6                  |      }||z  }t        j8                  ||t        j:                  |j<                        j>                        }t        j                  j                  jA                  |dt        jB                        j/                  |j<                        }|j                   \  }}}}}|j                   d   }|jE                  ddddd      j	                  d||      }|jE                  ddddd      j	                  d||      }t        jF                  ||      } | j	                  |||||      jE                  ddddd      }!|!j	                  |
|| jH                  z  | j                  | j                  f      }!|!d d d |f   }!|!S )NrT   r#   r!   r   r   z%Shape of extracted_valid_mask_blocks z	 is not (z, z) after potential reshape.r   rF   r   r   )%r   rs   rv   r   r   r   r   r   r/   rM   r   r   r   viewr   r   r   r   r   
ValueErrorr   r   logical_andr   r   r   r   tanhwherefinfor   minsoftmaxr   r   bmmr   )"rQ   rA   r   	qkv_shapequery_states
key_statesvalue_statesper_dim_scale_spbroadcast_shapeper_dim_scale_sp_broadcastr   q_timequery_blocks
key_blocksvalue_blocksr   original_valid_maskextracted_valid_mask_blockscondition_from_input_validitycondition_from_causalityfinal_condition_for_wherer?   softcap_valprobabilitiesb_dimn_dimu_dimw_dimc_dimh_dimprob_bunv_bun
result_bmmcontext_vectorss"                                     r4   ra   zGemma3nAudioAttention.forward  sT   Nm))#2.NNN	{{=199)DOOQ[[/77	BMMO
{{=199)DOOQ 88..778J8JKaDMM2%5%:%:?%K"#dll25OO)//3
F--l;00<
22<@'--a0  $e '+&A&ABU&V# (,,1+11!47R7X7XYZ7[[_c_p_pp*E*M*M,d.?.?+' ',,1
 

 /556i
| L$%R(9(9'::TV  )D(M(Ma(P(Z(Z[](^% $(#?#?#I#I!#L#V#VWX#Y#c#cde#f 
 %*$5$5)$''(E(L(LM%
! 11,
K lloofmm4+%F#+% 6FLL@Y@]@]^++33F%--3X[[bnbtbt[u -:,?,?)ueUE""2& ((Aq!Q7??E5Q$$Q1a3;;BuMYYx/
$,,UE5%OWWXY[\^_abdef)11 4??2	
 *!WfW*5r3   )r+   r,   r-   r$   rL   r   r/   rd   rb   r   r   r   r0   ra   re   rf   s   @r4   r   r   @  s     
1  
D'5<< 3 3 5<< u||  ,.'ELL .'U\\ .'`dU\\ d9I9I dell dr3   r   c                   r     e Zd ZdZ	 d	dedee   def fdZdej                  dej                  fdZ
 xZS )
Gemma3nAudioCumulativeGroupNorma  Applies Group Normalization cumulatively over the time dimension.

    This layer normalizes the input by calculating the mean and variance
    cumulatively over the time dimension (dim 1). The statistics are computed
    over all feature dimensions (specified by `feature_dims` and `num_channels`)
    for elements marked as valid by the optional `mask`.

    If a `mask` is provided (True for valid, False for invalid/padded),
    invalid time steps do not contribute to the statistics calculation, and
    their corresponding output values are zeroed out.

    Scale and bias, if enabled, are applied per-channel (last dimension).
    This behavior is similar to JAX's `GroupNormalization` with `num_groups=1`
    and `cumulative=True`.
    num_channelsfeature_dimsrG   c           	         t         |           || _        t        |      | _        || _        t        j                  t        j                  |            | _
        t        t        ddt        | j                        z   dz               | _        y )Nr!   r#   )rK   rL   r)  rC   r*  rG   rM   rN   r/   rO   rP   rangelenreduction_axes)rQ   r)  r*  rG   rR   s       r4   rL   z(Gemma3nAudioCumulativeGroupNorm.__init__5  sr     	(!,/ ll5::l#;< $E!QT5F5F1G-G!-K$LMr3   rA   r[   c                    | j                   | j                  fz   }|j                  dd |k7  rt        d|j                  dd  d|       |j                  }t
        j                  }|j                  |      }t        j                  ||      }t        j                  || j                  d      }t        j                  |d	      }t        j                  || j                  d      }	t        j                  |	d	      }
t        j                  |
d
      }||z  }||z
  j                  d      }t        j                  || j                  d      }t        j                  |d	      }||z  }||z
  t        j                  || j                  z         z  }| j                   j                  |      }dg|j#                         dz
  z  | j                  gz   }||j%                  |      z  }||z  }|j                  |      S )zApplies cumulative group norm, optionally using a mask.

        Args:
          hidden_states: Input tensor, shape [B, T, *feature_dims, C].

        Returns:
          Normalized tensor with the same shape as x.
        r!   NzInput tensor shape suffix z> does not match expected suffix (feature_dims + num_channels) r   TrF   rU   r#   r   rm   )r  )r*  r)  r   r  r   r/   r   r   	ones_likesumr.  cumsumclamprW   rsqrtrG   rP   rF   r  )rQ   rA   expected_input_suffixinput_dtype
calc_dtypex_calc	mask_calcsum_values_at_tcum_sum_valueselements_in_group_at_tcum_count_elementssafe_cum_count_elementscum_meansquared_diff_from_meansum_sq_diff_at_tcum_sum_sq_diffcum_variancenormalized_xscalescale_view_shapefinal_outputs                        r4   ra   z'Gemma3nAudioCumulativeGroupNorm.forwardG  s    !% 1 1T5F5F4H Hqr"&;;,]-@-@-D,E F99N8OQ 
 $))]]
!!*- OOF*=	  ))F0C0CTRo1= "'9$:M:MW[!\"\\*@aH"'++.@c"J "$;;
 #)8"3!8!8!; 99%;ATAT^bc  ,,'7Q? ')@@ )U[[9P-QQ z*3-"3"3"5"9:d>O>O=PP#ejj1A&BB $i/{++r3   )gMbP?)r+   r,   r-   r.   rb   r   r^   rL   r/   rd   ra   re   rf   s   @r4   r(  r(  $  sT    ( 	NN smN 	N$G,U\\ G,ell G,r3   r(  c                   ~     e Zd ZdZ	 d
dedededeeeeef   f fdZdej                  dej                  fd	Z
 xZS )Gemma3nAudioSSCPConvBlockzA single convolution block for the SubSampleConvProjection.

    This block consists of a 2D convolution, followed by CumulativeGroupNorm,
    and a ReLU activation. It handles manual padding for the convolution.
    ri   idxinput_freq_dimmanual_paddingc                 J   t         |           || _        || _        |dk(  rdn| j                  j                  |dz
     }| j                  j                  |   }| j                  j
                  |   \  }}| j                  j                  |   \  }	}
t        j                  ||||f|	|
fdd      | _	        || j                  d   z   | j                  d   z   }||z
  |
z  dz   }t        ||f| j                  j                        | _        t        j                         | _        y )Nr   r#   )r   r   F)in_channelsout_channelskernel_sizestridepaddingrl   )r)  r*  rG   )rK   rL   ri   rM  sscp_conv_channel_sizesscp_conv_kernel_sizesscp_conv_stride_sizerM   Conv2dconvr(  sscp_conv_group_norm_epsnormReLU
activation)rQ   ri   rK  rL  rM  rO  rP  kernel_hkernel_wstride_hstride_wf_in_padded
f_out_convrR   s                r4   rL   z"Gemma3nAudioSSCPConvBlock.__init__  s%    	, !8a)K)KCRSG)T{{99#>![[>>sC(![[>>sC(II#% h'

	 %t':':1'==@S@STU@VV!H,9A=
3%$44
	 '')r3   audio_encodingsr[   c                    t        j                  || j                  dd      j                  | j                  j
                  j                        }| j	                  |      }|j                  dddd      j                         }| j                  |      }|j                  dddd      j                         }| j                  |      S )Nconstantr   )modevaluer   r!   r   r#   )Fr   rM  r   rX  rP   r   r   r   rZ  r\  )rQ   rc  audio_encodings_paddedaudio_encodings_conv
x_for_normx_normedaudio_encodings_normeds          r4   ra   z!Gemma3nAudioSSCPConvBlock.forward  s     "#8K8KR\dg!h!k!kII"""

  $yy)?@ *11!Q1=HHJ
99Z(!)!1!1!Q1!=!H!H!J566r3   ))r   r   r   r   )r+   r,   r-   r.   r$   rb   rC   rL   r/   rd   ra   re   rf   s   @r4   rJ  rJ    sc     5A)$")$ )$ 	)$
 c3S01)$V7u|| 7 7r3   rJ  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )#Gemma3nAudioSubSampleConvProjectionri   c                 p   t         |           || _        |j                  }g }g }t	        d      D ]n  }|j
                  |   \  }}|j                  |   \  }}	d}
|dz
  }d}d}|||
|f}|j                  |       ||z   |z   }||z
  |	z  dz   }|j                  |       |}p t        d|j                  ||d         | _	        t        d|d   ||d         | _
        |j                  d   }|d   }||z  | _        t        j                  | j                  | j                  j                  d      | _        y )Nr!   r   r#   )rK  rL  ri   rM  rT   Frk   )rK   rL   ri   input_feat_sizer,  rU  rV  appendrJ  conv_0conv_1rT  input_proj_in_featuresrM   r|   rt   input_proj_linear)rQ   ri   current_f_for_block_inputcalculated_block_paddingcalculated_f_out_dimsir]  r^  r_  r`  	pad_t_toppad_t_bottom
pad_f_leftpad_f_rightmanual_padding_tuplera  f_out_after_convfinal_c_outfinal_f_outrR   s                      r4   rL   z,Gemma3nAudioSubSampleConvProjection.__init__  s   $*$:$:!#%  "q 	9A!'!=!=a!@Hh!'!=!=a!@Hh I#a<L JK 	$  %++,@A 4j@;NK +h 68CaG!(()9:(8%=	9@ 0!113A6	
 0033A6	
 33B7+B/&1K&?#!#4+F+FH_H_fk!lr3   rc  r[   c                     |j                  d      }| j                  |      }| j                  |      }|j                  \  }}}}|j	                  dddd      j                         }|j                  ||||z        }	| j                  |	      }
|
S )Nr#   r   r!   r   )r   rs  rt  r   r   r   r  rv  )rQ   rc  audio_encodings_reshapedr   r   c_outt_outf_out
x_permutedoutput_flattenedoutputs              r4   ra   z+Gemma3nAudioSubSampleConvProjection.forward  s     $3#<#<Q#? KK01KKN!"5%YYq!Q*557
%??1eUU]C''(89r3   	r+   r,   r-   r$   rL   r/   rd   ra   re   rf   s   @r4   ro  ro    s.    7m1 7mru||  r3   ro  c                   t     e Zd Zdef fdZdej                  dej                  dej                  fdZ xZ	S )Gemma3nAudioConformerAttentionri   c                    t         |           || _        | j                  j                  | _        | j                  dt        j                  | j                  j                        d       t        | j                  j                        | _
        t        |      | _        t        j                  | j                  | j                  j                  d      | _        t        | j                  j                        | _        y )Ngradient_clippingFrp   rk   )rK   rL   ri   rt   post_in_featuresr   r/   r   r  rE   pre_attn_normr   attnrM   r|   post	post_normrQ   ri   rR   s     r4   rL   z'Gemma3nAudioConformerAttention.__init__   s     $ 7 70%,,t{{?\?\2]jop+DKK,C,CD)&1	IId33T[[5L5LSXY	'(?(?@r3   rc  r*   r[   c                    |}t        j                  || j                   | j                        }| j                  |      }| j	                  ||      }|j
                  \  }}}}	|j                  ||||	z        }
| j                  |
      }t        j                  || j                   | j                        }|| j                  |      z   S r]   )	r/   r4  r  r  r  r   r   r  r  )rQ   rc  r*   audio_encodings_input_to_attnaudio_encodings_normaudio_encodings_attn_outr   r   rs   rv   r  s              r4   ra   z&Gemma3nAudioConformerAttention.forward*  s    (7%++o8N8N7NPTPfPfg#11/B#'99-A>#R  %=$B$B!1i#;#C#CAq)V^J^#_ ))$<=++o8N8N7NPTPfPfg,t~~o/NNNr3   
r+   r,   r-   r$   rL   r/   rd   r0   ra   re   rf   s   @r4   r  r    sA    A1 AOu|| OUEUEU OZ_ZfZf Or3   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS ) Gemma3nAudioConformerFeedForwardri   c                    t         |           || _        | j                  dt	        j
                  | j                  j                        d       t        | j                  j                        | _	        t        j                  | j                  j                  | j                  j                  dz  d      | _        t        j                  | j                  j                  dz  | j                  j                  d      | _        t        | j                  j                        | _        | j                  j                  | _        y )Nr  Frp   r   rk   )rK   rL   ri   r   r/   r   r  rE   rt   pre_layer_normrM   r|   ffw_layer_1ffw_layer_2post_layer_normconf_residual_weightpost_layer_scaler  s     r4   rL   z)Gemma3nAudioConformerFeedForward.__init__<  s    0%,,t{{?\?\2]jop,T[[-D-DE99T[[%<%<dkk>U>UXY>Y`ef99T[[%<%<q%@$++BYBY`ef-dkk.E.EF $ @ @r3   rc  r[   c                    |}t        j                  || j                   | j                        }| j                  |      }| j	                  |      }t
        j                  j                  |      }| j                  |      }t        j                  || j                   | j                        }| j                  |      }||| j                  z  z   S r]   )r/   r4  r  r  r  rM   r   silur  r  r  )rQ   rc  residuals      r4   ra   z(Gemma3nAudioConformerFeedForward.forwardH  s    "++o8N8N7NPTPfPfg--o>(,(8(8(I--,,_=(,(8(8(I++o8N8N7NPTPfPfg..??T-B-BBCCr3   r  rf   s   @r4   r  r  ;  s0    
A1 
A	Du|| 	D 	Dr3   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS ) Gemma3nAudioConformerLightConv1dri   c           	         t         |           || _        t        | j                  j                  | j                  j
                        | _        t        j                  | j                  j                  | j                  j                  dz  d      | _	        t        j                  | j                  j                  | j                  j                  | j                  j                  dd| j                  j                  d      | _        | j                  dt        j                  | j                  j                         d	       t        | j                  j                  | j                  j
                        | _        t        j                  | j                  j                  | j                  j                  d      | _        | j                  j                  dz
  | _        y )
NrG   r!   Frk   r#   r   )rO  rP  rQ  rR  rS  groupsrl   r  rp   )rK   rL   ri   rE   rt   rms_norm_epsr  rM   r|   linear_startConv1dconf_conv_kernel_sizedepthwise_conv1dr   r/   r   r  	conv_norm
linear_endcausal_paddingr  s     r4   rL   z)Gemma3nAudioConformerLightConv1d.__init__U  sD   ,T[[-D-D$++JbJbcIIdkk&=&=t{{?V?VYZ?Zafg "		//0099;;**!
 	0%,,t{{?\?\2]jop'(?(?T[[E]E]^))DKK$;$;T[[=T=T[`a"kk??!Cr3   rc  r[   c                 :   |}| j                  |      }| j                  |      }t        j                  j                  j                  |d      }|j                  ddd      }t        j                  || j                  df      }| j                  |      }|j                  ddd      }t        j                  || j                   | j                        }| j                  |      }t        j                  j                  |      }| j                  |      }||z   }|S )NrT   r   r   r!   r#   )r  r  r/   rM   r   glur   rh  r   r  r  r4  r  r  r  r  )rQ   rc  audio_encodings_residualaudio_encodings_permutedaudio_encodings_permuted_paddedr  s         r4   ra   z(Gemma3nAudioConformerLightConv1d.forwardj  s   #2 --o>++O<((--11/r1J#2#:#:1a#C *+%%0H4K^K^`aJb*c'//0OP)11!Q:++o8N8N7NPTPfPfg..9--,,_=///: #;;r3   r  rf   s   @r4   r  r  T  s-    D1 D*u||  r3   r  c                   t     e Zd Zdef fdZdej                  dej                  dej                  fdZ xZ	S )Gemma3nAudioConformerBlockri   c                    t         |           || _        t        | j                        | _        t        | j                        | _        t        | j                        | _        t        | j                        | _	        | j                  dt        j                  | j                  j                        d       t        | j                  j                        | _        y )Nr  Frp   )rK   rL   ri   r  ffw_layer_startr  	attentionr  lconv1dffw_layer_endr   r/   r   r  rE   rt   rZ  r  s     r4   rL   z#Gemma3nAudioConformerBlock.__init__  s    ?L7D7D=dkkJ0%,,t{{?\?\2]jop"4;;#:#:;	r3   rc  r*   r[   c                 j   | j                  |      }| j                  ||      }| }||j                  d      j                  |j                        z  }| j                  |      }| j                  |      }t        j                  || j                   | j                        }| j                  |      }|S )NrT   )r  r  r   r   r   r  r  r/   r4  r  rZ  )rQ   rc  r*   validity_mask_for_lconvaudio_encodings_for_lconv_inputr  s         r4   ra   z"Gemma3nAudioConformerBlock.forward  s    ..?...I#1/*9<S<]<]^`<a<d<d!!=
 +
' ,,'FG,,_=++o8N8N7NPTPfPfg?+r3   r  rf   s   @r4   r  r    s;    	<1 	<u|| UEUEU Z_ZfZf r3   r  c            	       Z     e Zd ZdZd	dedededef fdZdej                  f fdZ	 xZ
S )
Gemma3nTextScaledWordEmbeddingz\
    This module overrides nn.Embeddings' forward by multiplying with embeddings scale.
    num_embeddingsembedding_dimpadding_idxembed_scalec                     t         |   |||       || _        | j                  dt	        j
                  |      d       y )Nr  Frp   )rK   rL   scalar_embed_scaler   r/   r   )rQ   r  r  r  r  rR   s        r4   rL   z'Gemma3nTextScaledWordEmbedding.__init__  s;    D"-]ELL,ERWXr3   	input_idsc                     t         |   |      | j                  j                  | j                  j
                        z  S r]   )rK   ra   r  r   rP   r   )rQ   r  rR   s     r4   ra   z&Gemma3nTextScaledWordEmbedding.forward  s2    wy)D,<,<,?,?@Q@Q,RRRr3   )rm   )r+   r,   r-   r.   rb   r^   rL   r/   rd   ra   re   rf   s   @r4   r  r    sG    Ys Y3 YS Y_d Y
S S Sr3   r  c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )Gemma3nTextLaurelBlockz Learned Augmented Residual Layerri   c                    t         |           || _        t        j                  | j                  j
                  | j                  j                  d      | _        t        j                  | j                  j                  | j                  j
                  d      | _        t        | j                  j
                  | j                  j                        | _        y )NFrk   r  )rK   rL   ri   rM   r|   rt   laurel_ranklinear_leftlinear_rightrE   r  post_laurel_normr  s     r4   rL   zGemma3nTextLaurelBlock.__init__  s    99T[[%<%<dkk>U>U\abIIdkk&=&=t{{?V?V]bc .t{{/F/FDKKLdLd er3   rA   r[   c                 r    | j                  |      }| j                  |      }| j                  |      }||z   S r]   )r  r  r  )rQ   rA   laurel_hidden_statesnormed_laurel_hidden_statess       r4   ra   zGemma3nTextLaurelBlock.forward  sC    -1-=-=m-L-1->->?S-T&*&;&;<P&Q#:::r3   )
r+   r,   r-   r.   r&   rL   r/   rd   ra   re   rf   s   @r4   r  r    s0    *f0 f;U\\ ;ell ;r3   r  c                        e Zd Zd	dedef fdZdej                  dej                  fdZdej                  dej                  fdZ	 xZ
S )
Gemma3nTextMLPri   	layer_idxc                    t         |           || _        |j                  | _        |j                  |   | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  | j                  d      | _	        t        |j                     | _        |j                  |   | _        y NFrk   )rK   rL   ri   rt   intermediate_sizerM   r|   	gate_projup_proj	down_projr	   hidden_activationact_fnactivation_sparsity_patternactivation_sparsityrQ   ri   r  rR   s      r4   rL   zGemma3nTextMLP.__init__  s    !--!'!9!9)!D4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXV556#)#E#Ei#P r3   rA   r[   c                     | j                  |      }| j                  dkD  r| j                  |      }| j                  |      }| j	                  |      }| j                  ||z        }|S )Nr   )r  r  _gaussian_topkr  r  r  )rQ   rA   r  activationsr  r  s         r4   ra   zGemma3nTextMLP.forward  sc    NN=1	##c)++I6Ikk),,,}-NN;#89	r3   inputsc                    t        j                  | j                  t         j                  |j                        }t         j
                  j                  j                  dd      }|j                  |      }|j                  |j                        }t        j                  |dd      }t        j                  |ddd      }|||z  z   }t        j                  j                  ||z
        S )	Nr   r   r   r#   rT   Tr0  F)rF   rU   unbiased)r/   r   r  r   r   distributionsnormalNormalicdfr   r   rX   stdrM   r   relu)rQ   r  target_sparsity_tensornormal_diststd_multiplierinputs_mean
inputs_stdcutoff_xs           r4   r  zGemma3nTextMLP._gaussian_topk  s    !&d.F.Femmdjdqdq!r ))00771='2'7'78N'O',,V\\:jjR>YYv2teL
n!<<}}!!&8"344r3   )r   )r+   r,   r-   r&   rb   rL   r/   rd   ra   r  re   rf   s   @r4   r  r    sP    	Q0 	QS 	QU\\ ell 5U\\ 5ell 5r3   r  c                   X    e Zd ZdZdef fdZdej                  dej                  fdZdej                  dej                  fdZ	d	ej                  d
ej                  dej                  fdZ
dej                  dej                  fdZdej                  dej                  fdZ xZS )Gemma3nTextAltUpa  Alternating Updates (AltUp)

    The AltUp module wraps transformer layers. The `predict` step modifies the
    input to the transformer layer, and the `correct` step propagates the output
    of the transformer layer to the sparsely updated dimensions.

    See more in the research paper:

    https://proceedings.neurips.cc/paper_files/paper/2023/file/f2059277ac6ce66e7e5543001afa8bb5-Paper-Conference.pdf
    ri   c                 F   t         |           || _        t        j                  t        j                  | j                  j                              | _        t        j                  | j                  j                  | j                  j                  d      | _        t        j                  | j                  j                  | j                  j                  dz  d      | _        t        j                  | j                  j                  | j                  j                  d      | _        t        | j                  j                  | j                  j                        | _        | j#                  dt        j$                  | j                  j                  dz        d       y )NFrk   r!   r  router_input_scale      rp   )rK   rL   ri   rM   rN   r/   r   rt   correct_output_scaler|   altup_num_inputscorrection_coefsprediction_coefsmodality_routerrE   r  router_normr   r   r  s     r4   rL   zGemma3nTextAltUp.__init__  s   $&LLT[[=T=T1U$V! "		$++*F*FHdHdkp q "		$++*F*FHdHdfgHgns t!yy)@)@$++B^B^ejk)$++*A*At{{G_G_`15<<@W@WY]@]3^kpqr3   r   r[   c                     | j                  |      | j                  z  }| j                  |      }t        j                  |j                               j                  |      S r]   )r  r  r  r/   r  r^   r_   )rQ   r   router_inputsrouteds       r4   compute_router_modalitiesz*Gemma3nTextAltUp.compute_router_modalities  sM    ((+d.E.EE%%m4zz&,,.)11!44r3   rA   c                    | j                  || j                  j                           }| j                  ro| j                  j                  Y| j
                  j                  j                  j                  | j                  j                   | j                  j                          | j                  |      j                  g |j                  dd | j                  j                  | j                  j                   j                  dddd      }t        j                  |j                  dddd      |      }|j                  dddd      }||z  }|j                         j!                  |      S )a  Predicts the output of a layer using a trainable map.

        Args:
            hidden_states: A 4D tensor of shape `[num_altup_inputs, batch_size, num_tokens, hidden_size]` derived by
                stacking the input embeddings and preprocessing the last `num_altup_inputs - 1` matrices.

        Returns:
            A 4D tensor of shape `[num_altup_inputs, batch_size, num_tokens, hidden_size]` containing the predictions.
        NrT   r   r#   r   r!   )r  ri   altup_active_idxtrainingaltup_coef_clipr   rP   dataclamp_r   r   r  r   r/   r   r   r_   )rQ   rA   
modalities	all_coefspredictionss        r4   predictzGemma3nTextAltUp.predict  s@    33M$++B^B^4_`
==T[[88D!!((--44dkk6Q6Q5QSWS^S^SnSnoD!!*-Wi &&s+i-1[[-I-IiKO;;KgKgiWQ1a  	 ll=#8#8Aq!#DiP!))!Q15}$%%'//>>r3   r  	activatedc                    | j                  |      }||| j                  j                     z
  }|j                  | j                  j                  ddd      }| j
                  r| j                  j                  | j                  j                  j                  | j                  j                   | j                  j                        }t        j                  j                  j                  ||d      dz   }n| j                  |      dz   }|j                  ddd      j                  d      }t        j                   ||      }||z  }|j#                         j%                  |      S )a_  Corrects the predictions relative to the

        Args:
            predictions: A 4D tensor of shape `[num_altup_inputs, batch_size, num_tokens, hidden_size]` derived by
                stacking the input embeddings and preprocessing the last `num_altup_inputs - 1` matrices.
            activated: A 3D tensor of shape `[batch_size, num_tokens, hidden_size]` containing the activated inputs.

        Returns:
            A 4D tensor of shape `[num_altup_inputs, batch_size, num_tokens, hidden_size]` correcting the original
                predictions relative to the activated input embeddings.
        r#   Nrk   rm   r!   r   rT   )r  ri   r  repeatr  r	  r
  r  rP   r4  r/   rM   r   linearr   r   mulr   r_   )rQ   r  r  r  
innovationrP   r  	correcteds           r4   correctzGemma3nTextAltUp.correct  s,    33I>
T[[-I-I!JJ
&&t{{'C'CQ1M
==T[[88D**11779T9T8TVZVaVaVqVqrF++22:vD2QTWWI--j9C?I
 %%aA.88<	IIj)4	[ 	##%--i88r3   r  c                 p    |j                  | j                        | j                  z  j                  |      S )a	  
        This is only defined as the `forward` so that accelerate hooks can move correctly `correct_output_scale`
        (which is a nn.Parameter, not a Module) between devices when offloading. It is otherwise only used in
        `scale_corrected_output`
        )r_   r  rQ   r  s     r4   ra   zGemma3nTextAltUp.forward6  s2     !!$";";<t?X?XXaabkllr3   c                 $    | j                  |      S )zMScales the provided 3D tensor of shape [batch_size, num_tokens, hidden_size].)ra   r  s     r4   scale_corrected_outputz'Gemma3nTextAltUp.scale_corrected_output>  s    ||I&&r3   )r+   r,   r-   r.   r&   rL   r/   rd   r  r  r  ra   r  re   rf   s   @r4   r  r    s    	r0 r55<< 5ELL 5
?U\\ ?ell ?895<< 9ELL 9U\\ 9>m m%,, m' ' 'r3   r  c                     | dd| j                   d   dz  f   }| d| j                   d   dz  df   }t        j                  | |fd      S )z*Rotates half the hidden dims of the input..NrT   r!   r   )r   r/   r   )r   x1x2s      r4   rotate_halfr   C  sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r3   rA   n_repr[   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r#   N)r   expandr   )rA   r!  r   num_key_value_headsslenrv   s         r4   	repeat_kvr&  J  so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr3   modulequerykeyrg  attention_maskdropoutscalingr   c                 |   || j                   dz  }t        || j                        }	t        || j                        }
t        j                  ||	j                  dd            |z  }|||z  }t        j                  |      }||z  }|||z   }t        j                  j                  |dt        j                        j                  |j                        }t        j                  j                  ||| j                        }t        j                  ||
      }|j                  dd      j                         }||fS )NrV   r!   r   rT   r  )pr	  r#   )rv   r&  num_key_value_groupsr/   r   	transposer  rM   r   r	  r   r   r   r+  r	  r   )r'  r(  r)  rg  r*  r+  r,  r   kwargsr  r  attn_weightsattn_outputs                r4   eager_attention_forwardr4  V  s    //4'3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL#g-zz,/#g-!#n4 ==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r3   r   r   r   unsqueeze_dimc                 n    |j                  |      }|j                  |      }| |z  t        |       |z  z   S )a\  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        x (`torch.Tensor`): The tensor to embed.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )r   r   )r   r   r   r5  s       r4   apply_rotary_pos_embr7  x  s8    " --
&C
--
&CGA,--r3   c                        e Zd ZdZdedef fdZ	 	 	 ddej                  dej                  dej                  dz  d	e	dz  d
e
e   deej                  ej                  dz  eej                     dz  f   fdZ xZS )Gemma3nTextAttentionz=Multi-headed attention from 'Attention Is All You Need' paperri   r  c                    t         |           t        |d      r|j                  |   nd | _        || _        || _        t        |d|j                  |j                  z        | _
        |j                  |j                  z  | _        d| _        | j
                  j                  | _        d| _        t!        j"                  |j                  |j                  | j                  z  |j$                        | _        t!        j"                  |j                  |j                  | j                  z  |j$                        | _        t!        j"                  |j                  |j                  | j                  z  |j$                        | _        t!        j"                  |j                  | j                  z  |j                  |j$                        | _        | j                  dk(  r|j.                  nd | _        | j                  dk(  | _        t3        |j                  |j4                        | _        t3        |j                  |j4                        | _        t3        |j                  |j4                  d	      | _        | j
                  j<                  | j
                  j>                  z
  }||cxk\  xr d
kD  nc | _         |j                  d | }| j@                  r@tC        |      dz
  |d d d   jE                  |j                  |         z
  | _#        d| _$        y d | _#        |tC        |      dz
  |d d d   jE                  |j                  |         z
  k(  | _$        y )Nlayer_typesrv   rm   Trk   sliding_attention)rF   rG   F)rF   rG   rH   r   r#   rT   )%rK   rL   hasattrr;  
layer_typeri   r  getattrrt   num_attention_headsrv   r$  r/  r,  attention_dropout	is_causalrM   r|   attention_biasr   r   r   o_projsliding_window
is_slidingrE   r  q_normk_normv_normnum_hidden_layersnum_kv_shared_layersis_kv_shared_layerr-  indexkv_shared_layer_indexstore_full_length_kv)rQ   ri   r  first_kv_shared_layer_idxprev_layersrR   s        r4   rL   zGemma3nTextAttention.__init__  s   ;B6=;Y&,,Y7_c"
F4F4F&JdJd4de$*$>$>&B\B\$\!!%!>!>ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
 8<J]7]f33cg//-@@$f>Q>QR$f>Q>QR$f>Q>Q^cd$(KK$A$ADKKDdDd$d!"+/H"L1"L(()C*CD""),[)9A)=DbD@Q@W@WX^XjXjktXu@v)vD&(-D%)-D&(1S5E5IKX\Z\X\L]LcLc""9-M 6 )D%r3   NrA   position_embeddingsr*  r@   r1  r[   c                    |j                   d d }g |d| j                  j                  }|\  }}	| j                  |      j	                  |      }
| j                  |
      }
t        |
||	d      }
|
j                  dd      }
| j                  rU|S|j                  | j                     \  }}|j                  |
j                        }|j                  |
j                        }n| j                  |      j	                  |      }| j                  |      }t        |||	d      }|j                  dd      }| j                  |      j	                  |      }| j!                  |      }|j                  dd      }|f| j                  s |j#                  ||| j$                        \  }}| j&                  r.t)        |d      si |_	        ||f|j                  | j$                  <   t+        j,                  | j                  j.                  t0              } || |
|||f| j2                  r| j4                  nd| j6                  | j8                  d|\  }} |j:                  g |d j=                         }| j?                  |      }||fS )NrT   r!   )r5  r#   shared_layersr   )r+  r,  rE  ) r   ri   rv   r   r  rG  r7  r0  rL  rT  rN  r   r   r   rH  r   rI  updater  rO  r=  r   get_interface_attn_implementationr4  r	  rA  r,  rE  r   r   rD  )rQ   rA   rR  r*  r@   r1  input_shapehidden_shaper   r   r  r  r  attention_interfacer3  r2  s                   r4   ra   zGemma3nTextAttention.forward  sq    $))#2.??b?$++*>*>?&S{{=166|D{{<0+L#sRST#--a3 ""'B'6'D'DTE_E_'`$J#|':':;J'??<+>+>?L]388FJZ0J-j#sRSTJ#--a3J;;}5::<HL;;|4L'11!Q7L&**+:+A+A*l\`\j\j+k(
L((@46O1@JL@X--dnn=(?(M(MKK,,.E)
 %8
%
 /3mmD**LL..
%
 
%
!\ *k));;;;FFHkk+.L((r3   )NNN)r+   r,   r-   r.   r&   rb   rL   r/   rd   r
   r   r   rC   ra   re   rf   s   @r4   r9  r9    s    G*0 *S *^ -1.2(,;)||;) #\\;) t+	;)
 ;) +,;) 
u||U\\D0%2E2LL	M;)r3   r9  c                   .    e Zd Zdedef fdZ	 	 	 	 	 ddej                  dej                  dej                  dej                  dz  d	ej                  dz  d
e	dz  de
e   deej                  eej                  ej                  f   dz  f   fdZ xZS )Gemma3nTextDecoderLayerri   r  c                 x   t         |           || _        |j                  | _        || _        t        ||      | _        t        ||      | _        t        | j                  |j                        | _        t        | j                  |j                        | _        t        | j                  |j                        | _        t        | j                  |j                        | _        |j                  | _        t         |j"                     | _        t'        |      | _        t+        |      | _        t/        j0                  | j                  | j                  d      | _        t/        j0                  | j                  | j                  d      | _        t        | j                  |j                        | _        y )N)r  r  Frk   )rK   rL   ri   rt   r  r9  	self_attnr  mlprE   r  input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernormhidden_size_per_layer_inputr	   r  r  r  altupr  laurelrM   r|   per_layer_input_gateper_layer_projectionpost_per_layer_input_normr  s      r4   rL   z Gemma3nTextDecoderLayer.__init__  sJ   !--"-fi@!&I>-d.>.>FDWDWX(6t7G7GVM`M`(a%)78H8HfNaNa)b&*89I9IvObOb*c'+1+M+M(V556%f-
,V4$&IId.>.>@`@`gl$m!$&IId.N.NPTP`P`gl$m!)78H8HfNaNa)b&r3   NrA   rR  per_layer_inputr*  position_idsr@   r1  r[   c           	      r   | j                   j                  |      }|| j                  j                     }	| j	                  |	      }
| j                  |
      } | j                  d|
||||d|\  }}| j                  |      }|	|z   }||z   t        j                  d      z  }| j                  |      }| j                  |      }| j                  |      }||z   }| j                   j                  ||      }|| j                  j                     j                         }| j                  j                  r| j                   j!                  |      }| j#                  |      }| j%                  |      }t'        j(                  ||      }| j+                  |      }| j-                  |      }|dd xxx |z  ccc |S )N)rA   r*  rk  rR  r@   r!   r#   r2   )re  r  ri   r  r`  rf  r^  ra  r~   sqrtrb  r_  rc  r  r   altup_correct_scaler  rg  r  r/   multiplyrh  ri  )rQ   rA   rR  rj  r*  rk  r@   r1  r  active_predictionactive_prediction_normedlaurel_outputr  r   
attn_gatedattn_laurel	attn_normattn_ffwattn_ffw_normattn_ffw_laurel_gatedcorrected_predictionsfirst_predictions                         r4   ra   zGemma3nTextDecoderLayer.forward  s    jj((7'(D(DE#'#7#78I#J $<= $.. 
2)% 3+
 
a ,,T2&-
!M1TYYq\A22;?	88I&77A +m ; $

 2 2;@U V01M1MNTTV;;**#zz@@AQR  445EF;;'78 >>*:OL  445EF99:JKab!%55!$$r3   )NNNNN)r+   r,   r-   r&   rb   rL   r/   rd   
LongTensorr
   r   r   rC   r;   ra   re   rf   s   @r4   r\  r\    s    c0 cS c0 -1(,.204(,1%||1% #\\1% 	1%
 t+1% &&-1% 1% +,1% 
u||U5#4#4e6G6G#GH4OO	P1%r3   r\  c                        e Zd ZU eed<   dZdZdgZdgZdZ	dZ
dZdZdZeedZdZ ej&                          fd       Z xZS )	Gemma3nPreTrainedModelri   modelTr\  r@   )rA   rB   )imagetextaudioc                 F   t         |   |       t        |t              r!t	        j
                  |j                         nt        |t              rt	        j                  |j                         |j                  dz  }dt        j                  j                  j                  t        j                  d            z  }t	        j                   |j"                  ||z         t	        j$                  |j&                  |j(                         t	        j                   |j*                  |j-                                nt        |t.              r,t	        j$                  |j0                  |j2                         nit        |t4              rXt	        j                  |j6                         t	        j$                  |j8                  | j:                  j<                  dz         nt        |t>              rd\  }}|j@                  dz  }tC        jD                  tG        |      tG        |      z        tI        |dz
  d      z  }|t        jJ                  t        jL                  |      | z        z  }t	        j                   |jN                  |jG                         jQ                  d      jQ                  d             n&t        |tR              rdt	        j$                  |jT                  | j<                  dz         t	        j$                  |jV                  dtC        jX                  d	      z         nt        |tZ              r|j\                  D ]  }	|j^                  }
|j`                  |	   d
k7  rtb        |j`                  |	      }
 |
|j:                  |	      \  }}t	        j                   te        ||	 d      |       t	        j                   te        ||	 d      |        tg        |d      r5t	        j$                  |jh                  | j:                  jh                         y y )NrV   rm   r   r  )rm   rn   r!   r#   r          @defaultr>  	_inv_freq_original_inv_freqr  )5rK   _init_weights
isinstancer(  initones_rP   r   zeros_r   rv   r/   rM   r   r   r   copy_r   	constant_r   r   r   r   r  r  r  r  r  r  ri   rt   rh   ru   r~   r   r^   rw   r   r   ro   r   Gemma3nTextModelper_layer_projection_scaleper_layer_input_scalerm  Gemma3nRotaryEmbeddingr;  compute_default_rope_parameters	rope_typer   r?  r=  r  )rQ   r'  r   r   r   r   r   r   ro   r>  rope_init_fncurr_inv_freqr   rR   s                r4   r  z$Gemma3nPreTrainedModel._init_weightsY  s   f%f=>JJv}}% 56KK,,-oot+G!4!4!=!=ell3>O!PPLJJv~~w'=>NN6>>6+K+KLJJv55v7\7\7^_ >?NN6--v/H/HI 01KK334NN644dkk6M6Mt6ST EF+5(M=#__1N&*hhu]/CeMFZ/Z&[^a"A_ '# +UYYu||N7SWnVn7n-ooNJJv,,n.B.B.D.N.Nq.Q.[.[\].^_ 01NN6<<d>N>NPT>TUNN677TYYs^9KL 67$00 ^
%EE##J/9<#6v7G7G
7S#TL#/*#U q

76j\+CDmT

76j\9K+LM}]^ 6./NN633T[[5R5RS 0r3   )r+   r,   r-   r%   r1   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendr\  r9  _can_record_outputsinput_modalitiesr/   no_gradr  re   rf   s   @r4   r}  r}  F  sv    &*#23#4"5N!"&0* 2U]]_%T %Tr3   r}  c                        e Zd ZU dZeed<   dZdZdef fdZe	e
dej                  dej                  dee   deez  fd	              Z xZS )
Gemma3nAudioEncoderzx
    An audio encoder based on the [Universal Speech Model](https://huggingface.co/papers/2303.01037) architecture.
    ri   	audio_melr  c                    t         |   |       || _        t        |      | _        t        j                  t        |j                        D cg c]  }t        |       c}      | _
        | j                          y c c}w r]   )rK   rL   ri   ro  subsample_conv_projectionrM   
ModuleListr,  conf_num_hidden_layersr  	conformer	post_init)rQ   ri   r   rR   s      r4   rL   zGemma3nAudioEncoder.__init__  se     )LV)T&9>v?\?\9]^A'/^
 	 _s   A=r*   r1  r[   c                 >   | j                  |      }|j                  d   }d}t        t        | j                  j
                              D ]!  }|| j                  j
                  |   d   z  }# t        j                  ||j                        |z  }t        j                  ||j                  d   dz
        }|j                  dkD  r>|j                  dk(  r/|j                  d      j                  |j                  d   d      }n`|j                  |j                  k(  rG|j                  d   dk(  r5|j                  d   dk7  r#||j                  d   k(  r|j                  d      }t        j                  |d|      }	| j                  D ]  }
 |
||	      } | j                  j                  dkD  r@|dddd| j                  j                  f   }|	dddd| j                  j                  f   }	|j!                  |	j                  d      d      }t#        ||	      S )	a  Encodes a batch of MELs.

        Args:
            audio_mel: a torch.Tensor of shape [batch, num_frames, num_channels,
              mel_bins].

        Returns:
            audio_encodings: a torch.Tensor of shape
                `[batch_size, self.config.audio_soft_tokens_per_image,
                self.config.audio_config.hidden_size]`
            audio_mel_mask: a torch.BoolTensor of shape [batch, num_frames].
        r#   r   r   )rw   rT   Nr   )last_hidden_stater*   )r  r   r,  r-  ri   rV  r/   r   r   r4  r   r   r#  gatherr  conf_reduction_factormasked_fillr)   )rQ   r  r*   r1  rc  t_subtime_stride_productstride_pair_idxindicescurrent_maskblocks              r4   ra   zGemma3nAudioEncoder.forward  s   " 88C  %%a($S)J)J%KL 	YO4;;#D#D_#UVW#XX	Y ,,u^-B-BCFYY++g>+?+?+BQ+FG "w||q'8''*11.2F2Fq2I2NG7<</$$Q'1,a A%q)) ''*G||NAw?^^ 	CE#O\BO	C ;;,,q0-a1UDKK4U4U1U.UVO'+Odkk.O.O+O(OPL)55l6L6LR6PRUV--'
 	
r3   )r+   r,   r-   r.   r$   r1   main_input_namer  rL   r   r    r/   rd   r0   r   r   rC   r)   ra   re   rf   s   @r4   r  r    sz     !O1   8
8
7<7G7G8
SYZlSm8
	/	/8
   8
r3   r  c                        e Zd ZU ej                  ed<   ddef fdZe	 	 	 	 ddedz  de	d   de
dz  dedz  d	ed
ef   f
d       Z ej                         edd              Z xZS )r  inv_freqNri   c                 v   t         |           |j                  | _        |j                  | _        || _        t        t        |j                              | _        i | _	        | j                  D ]  }| j
                  j                  |   }||d   | j                  |<   | j                  }| j                  |   dk7  rt        | j                  |      } || j
                  ||      \  }}| j                  | d|d       | j                  | d|j                         d       t        | | d|        y )	Nr  r  r  r  Frp   r  _attention_scaling)rK   rL   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenri   listsetr;  r  rope_parametersr  r   r   r   setattr)	rQ   ri   r   r>  rope_paramsr  r  curr_attention_scalingrR   s	           r4   rL   zGemma3nRotaryEmbedding.__init__  s8   "("@"@$*$B$B!F$6$6 78** 	UJ++55jAK")4[)ADNN:&%)%I%IL~~j)Y624>>*3MN4@fak4l1M1  J<y!9=UZ [  J</A!BMDWDWDYfk lDZL(:;=ST	Ur3   r   ztorch.deviceseq_lenr>  r[   ztorch.Tensorc                     | j                   |   d   }t        | dd      xs | j                  | j                  z  }d}d|t	        j
                  d|dt        j                        j                  |t        j                        |z  z  z  }||fS )	a|  
        Computes the inverse frequencies according to the original RoPE implementation
        Args:
            config ([`~transformers.PreTrainedConfig`]):
                The model configuration.
            device (`torch.device`):
                The device to use for initialization of the inverse frequencies.
            seq_len (`int`, *optional*):
                The current sequence length. Unused for this type of RoPE.
            layer_type (`str`, *optional*):
                The current layer type if the model has different RoPE parameters per type.
                Should not be used unless `config.layer_types is not None`

        Returns:
            Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
            post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
        
rope_thetarv   Nrm   r   r!   r   r   )	r  r?  rt   r@  r/   r   int64r   r^   )ri   r   r  r>  baserF   attention_factorr  s           r4   r  z6Gemma3nRotaryEmbedding.compute_default_rope_parameters  s    2 %%j1,?fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 )))r3   c                 N   t        | | d      }t        | | d      }|d d d d f   j                         j                  |j                  d   dd      j	                  |j
                        }|d d d d d f   j                         }t        |j
                  j                  t              r/|j
                  j                  dk7  r|j
                  j                  nd}t        |d	      5  |j                         |j                         z  j                  dd
      }	t        j                  |	|	fd      }
|
j                         |z  }|
j                         |z  }d d d        j	                  |j                        j	                  |j                        fS # 1 sw Y   AxY w)Nr  r  r   rT   r#   mpscpuF)device_typeenabledr!   r   r   )r?  r^   r#  r   r   r   r  r   strr   r0  r/   r   r   r   r   )rQ   r   rk  r>  r  attention_scalinginv_freq_expandedposition_ids_expandedr  freqsembr   r   s                r4   ra   zGemma3nRotaryEmbedding.forward  sl    4J<y!9:#DZL8J*KL$T1d]399;BB<CUCUVWCXZ\^_`ccdedldlm ,QaZ 8 > > @'1!((--'E!((--[`J`ahhmmfkUC 	0&,,.1F1L1L1NNYYZ[]^_E))UEN3C'')//C'')//C		0 vvAGGv$cff177f&;;;	0 	0s   *A1FF$NNNNNNr]   )r+   r,   r-   r/   rd   r1   r&   rL   staticmethodr   rb   r  rC   r^   r  r  r   ra   re   rf   s   @r4   r  r    s    llU0 U. +/+/"!%	!*!D(!*(!* t!* $J	!*
 
~u$	%!* !*F U]]_<  <r3   r  zBThe base Gemma 3n language model without a language modeling head.c                       e Zd ZU eed<   dZdef fdZe ed      e		 	 	 	 	 	 	 dde
j                  dz  de
j                  dz  d	e
j                  dz  d
e
j                  dz  dedz  de
j                  dz  dedz  dee   defd                     Zde
j                  de
j                  fdZ	 dde
j                  de
j                  dz  de
j                  fdZ xZS )r  ri   )r  c           
      |   t         |   |       |j                  | _        |j                  | _        t        |j                  |j                  | j                  | j                  j                  dz        | _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t        |j                  |j                         | _        t%        |      | _        d| _        |j                  | _        |j*                  | _        t        |j,                  |j                  |j*                  z  | j                  |j*                  dz        | _        t        j0                  | j                  |j                  |j*                  z  d      | _        t        |j*                  |j                         | _        t        j                  t        d| j                  j6                        D cg c].  }t        j0                  | j                  | j                  d      0 c}      | _        t        j                  t        d| j                  j6                        D cg c].  }t        j0                  | j                  | j                  d      0 c}      | _        | j=                  dt?        j@                  | j                  dz        d	       | j=                  d
t?        jB                  t?        j@                  d            d	       | jE                          y c c}w c c}w c c}w )N      ?)r  r  Frk   r#   r  rV   rp   r  r  )#rK   rL   pad_token_idr  
vocab_sizer  rt   ri   embed_tokensrM   r  r,  rJ  r\  layersrE   r  rZ  r  
rotary_embgradient_checkpointingrd  vocab_size_per_layer_inputembed_tokens_per_layerr|   per_layer_model_projectionper_layer_projection_normr  altup_projectionsaltup_unembed_projectionsr   r/   r   r5  r  )rQ   ri   r  r   rR   s       r4   rL   zGemma3nTextModel.__init__)  s    !.. ++ ;v1143C3CQUQ\Q\QhQhjmQm
 mmINvOgOgIhiI$VY7i
 #6#5#56;N;NO	08&+#!--+1+M+M(&D--$$v'I'II::C?	'
# +-))$$v'I'II+
' *88Z8Z`f`s`s)t&!#PUVWY]YdYdYuYuPvw1RYYt'')9)9Fw"
 *,PUVWY]YdYdYuYuPvw1RYYt'')9)9Fw*
& 	95<<HXHXZ^H^;_lqr4ekk%,,sBS6Tafg 	K j4 x xs   "L/3L413L9F)tie_last_hidden_statesNr  per_layer_inputsr*  rk  r@   inputs_embeds	use_cacher1  r[   c           	      T   |du |duz  rt        d      |"| j                  |      }| j                  |      }| j                  ||      }|r|t	        | j
                        }|V||j                         nd}	t        j                  |j                  d   |j                        |	z   }|j                  d      }t        |x}
t              s)| j
                  ||||d}t        di |t        di |d}
|}t        j                   |d	z  d
d      dz  }t        j"                  d      }|g}t%        d| j
                  j&                        D ]  } | j(                  |dz
     |      }|j+                  |j,                  |j                        }t        j                   |d	z  d
d      }t        j.                  t        j0                  ||j+                  |j                                    }||z  |z  }|j3                  |        t        j4                  |d      }i }| j
                  j6                  D ]  }| j9                  |||      ||<    t;        | j<                  d| j
                  j>                         D ]Y  \  }}|
| j
                  j6                  |      }|dddd|ddf   } |||| j
                  j6                  |      |f|||d|}[ t        j                   |d   d	z  d
d      dz  }|d   g}t%        d| j
                  j&                        D ]  } | j@                  |dz
     ||         }|j+                  |j,                  |j                        }t        j                   |d	z  d
d      }t        j.                  t        j0                  ||j+                  |j                                    }||z  |z  }|j3                  |        t        j4                  |      }t        j                   |d      }| jC                  |      }tE        ||      S )z
        per_layer_inputs (torch.Tensor, *optional*, defaults to None):
            Pre-computed per-layer embeddings. If None, they are derived from input_ids if provided.
        N:You must specify exactly one of input_ids or inputs_embedsri   r   r#   r   )ri   r  r*  r@   rk  )full_attentionr<  r!   rT   Tr0  r  gh㈵>r  r   )r*  rk  r@   )r  r@   r2   )#r  r  get_per_layer_inputsproject_per_layer_inputsr   ri   get_seq_lengthr/   r   r   r   r   r  dictr   r   rX   r   r,  r  r  r   r   rm  maximumrr  stackr;  r  	enumerater  rJ  r  rZ  r   )rQ   r  r  r*  rk  r@   r  r  r1  past_seen_tokenscausal_mask_mappingmask_kwargshidden_states_0target_magnitudeepsilon_tensortemp_hidden_statesrz  
altup_projcurrent_hidden_statenew_magnituderA   rR  r>  decoder_layercausal_maskrj  altup_unemb_projs                              r4   ra   zGemma3nTextModel.forwardZ  s<   $ -t";<YZZ  --i8M#88C88HXY0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L ?-F ++!."0#2 ,K #5"C{"C%F%U%U# ( !::oq&8b$OSVVd+-.q$++667 	<A6//A6GJ#-==7L7LUeUlUl=#m !JJ';Q'>BPTUM!JJu}}]NDUDUVfVmVmDn'opM#7:J#J]#Z %%&:;	< $6A> ++11 	gJ.2oom\[e.f
+	g !*$++6U8U8U*V W 	A}-dkk.E.Ea.HIK.q!Qz:O)#DKK$;$;A$>?  +) / M		 !::mA&6!&;TRVYY+A./q$++667 	<A-RT-K-KAPQE-RS`abSc-d#3#6#6_=R=R[k[r[r#6#s !JJ';Q'>BPTUM!JJu}}]NDUDUVfVmVmDn'opM#7:J#J]#Z %%&:;	< $67

=a8		-0&++
 	
r3   c                      | j                  |      j                  g |j                  | j                  j                  | j
                   S r]   )r  r   r   ri   rJ  rd  )rQ   r  s     r4   r  z%Gemma3nTextModel.get_per_layer_inputs  sP    =t**95== 
__
KK))
 ,,
 	
r3   c                    | j                  |      }|| j                  j                  |j                  |j                        z  } |j
                  g |j                  d d | j                  j                  | j                   }| j                  |      }||S |j                  |j                  k7  r |dd | j                  j                  d d f   }||z   | j                  j                  |j                  |j                        z  S )Nr  rT   .)r  r  r   r   r   r   r   ri   rJ  rd  r  r  )rQ   r  r  rh  s       r4   r  z)Gemma3nTextModel.project_per_layer_inputs  s.   
 .2-L-L]-[ ? ? B B%%.B.I.I !C !
 	
  <3;;  
  "% 
KK)) 
 ,, 

  $==>RS#''%%)9)?)??/5Tt{{7T7T5TVW0WX$'774;U;U;X;X%%.B.I.I <Y <
 
 	
r3   )NNNNNNNr]   )r+   r,   r-   r&   r1   r  rL   r   r    r   r/   r{  rd   r
   r;   rc   r   r   r   ra   r  r  re   rf   s   @r4   r  r  $  sV    /0 /b  E2 .204.204(,26!%g
##d*g
  ,,-g
 t+	g

 &&-g
 g
 ((4/g
 $;g
 +,g
 
!g
  3  g
R
e.>.> 
5<< 
 15
||
  ,,-
 
	
r3   r  z?The base Gemma 3n language model with a language modeling head.c                   T    e Zd ZU ddiZddiZddgdgfiZeed<   def fdZe	e
	 	 	 	 	 	 	 	 dd
ej                  d	z  dej                  d	z  dej                  d	z  ded	z  dej                  d	z  dej                  d	z  ded	z  deej                  z  dee   defd              Z xZS )Gemma3nForCausalLMlm_head.weightzmodel.embed_tokens.weightlm_headcolwise_gather_outputrA   r?   ri   c                     t         |   |       t        |      | _        |j                  | _        t        j                  |j                  |j                  d      | _        | j                          y r  )
rK   rL   r  r~  r  rM   r|   rt   r	  r  r  s     r4   rL   zGemma3nForCausalLM.__init__  sU     %f-
 ++yy!3!3V5F5FUS 	r3   Nr  r*  rk  r@   r  labelsr  logits_to_keepr1  r[   c	           
          | j                   d||||||d|	}
|
j                  }t        |t              rt	        | d      n|}| j                  |dd|ddf         }| j                  j                  G|| j                  j                  z  }t        j                  |      }|| j                  j                  z  }d}| | j                  ||| j                  fi |	}t        |||
j                  |
j                  |
j                        S )a  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, Gemma3nForCausalLM

        >>> model = Gemma3nForCausalLM.from_pretrained("google/gemma-2-9b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```)r  r*  rk  r@   r  r  N)r>   r?   r@   rA   rB   r2   )r~  r  r  rb   slicer	  ri   final_logit_softcappingr/   r  loss_functionr  r   r@   rA   rB   )rQ   r  r*  rk  r@   r  r  r  r  r1  outputsrA   slice_indicesr?   r>   s                  r4   ra   zGemma3nForCausalLM.forward  s   @ ,64:: ,
)%+',
 ,
  118B>SV8W~ot4]kmA}a,?@A;;..:dkkAAAFZZ'FdkkAAAF%4%%ffdooPPD%#33!//))
 	
r3   )NNNNNNNr   )r+   r,   r-   _tied_weights_keys_tp_plan_pp_planr&   r1   rL   r   r   r/   r{  rd   r
   r;   rc   rb   r   r   r   ra   re   rf   s   @r4   r  r    s%   *,GH23H_-z:;H0   .2.204(,26*.!%-.;
##d*;
 t+;
 &&-	;

 ;
 ((4/;
   4';
 $;;
 ell*;
 +,;
 
 ;
  ;
r3   r  c                        e Zd ZdZdeez  def fdZ	 	 d
dej                  dz  dej                  dz  dej                  fd	Z xZS )Gemma3nMultimodalEmbedderzQEmbeds token ids or soft tokens for multimodal content into language model space.multimodal_configtext_configc                 r   t         |           |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j                  | _        t        j                  | j                  | j                        | _        t        | j                  | j
                        | _        t        | j                  | j
                        | _        t        j                  | j                  | j                  d      | _        t        | j                  | j
                  d      | _        y )Nr  Frk   )rG   rH   )rK   rL   rt   multimodal_hidden_sizer  rG   vocab_offsetr  text_hidden_sizerM   	Embedding	embeddingrE   hard_embedding_normsoft_embedding_normr|   embedding_projectionembedding_post_projection_norm)rQ   r  r  rR   s      r4   rL   z"Gemma3nMultimodalEmbedder.__init__<  s    
 	&7&C&C#$11-::+66 + 7 7doot7R7RS#1$2M2MSWS[S[#\ #1$2M2MSWS[S[#\ $&IId.I.I4K`K`gl$m!.<T=R=RX\X`X`mr.s+r3   Nr  r  r[   c                     |du |duz  rt        d      || j                  |      }n/| j                  || j                  z
        }| j	                  |      }| j                  |      }| j                  |      S )a  Embeds token ids or soft tokens for multimodal content into language model space.

        Args:
            input_ids: A torch.LongTensor containing the token ids to embed. Values should be in the range
                `[vocab_offset, vocab_offset + vocab_size)`.
            inputs_embeds: A torch.Tensor containing the soft tokens to embed.

        Returns:
            A torch.Tensor of embeddings with  shape `[batch_size, seq_len, self.config.text_config.hidden_size]`.
        Nr  )r  r"  r   r  r!  r#  r$  )rQ   r  r  emb_normhard_embemb_norm_projs         r4   ra   z!Gemma3nMultimodalEmbedder.forwardO  s     -t";<YZZ$//>H~~i$2C2C&CDH//9H11(;22=AAr3   r  )r+   r,   r-   r.   r$   r'   r&   rL   r/   r{  rd   ra   re   rf   s   @r4   r  r  9  sk    [t-0CCt 't* .2-1B##d*B ||d*B 
	Br3   r  z
    The base Gemma 3n model comprising a vision backbone, an audio backbone, and a language model without a
    language modeling head.
    c                       e Zd ZdZdef fdZd Zd Ze e	d      de
j                  d	ee   d
eez  fd              Z	 	 	 	 dde
j"                  dz  de
j                  dz  de
j                  dz  de
j                  dz  fdZe	 	 	 	 	 	 	 	 	 	 	 dde
j"                  dz  de
j                  dz  de
j                  dz  de
j&                  dz  de
j&                  dz  de
j"                  dz  dedz  de
j"                  dz  de
j                  dz  de
j"                  dz  dedz  dee   d
efd       Ze e	d      de
j&                  de
j&                  d	ee   d
eez  fd              Z xZS ) Gemma3nModelFri   c                 $   t         |   |       t        j                  |j                        | _        |j                  j                  | _        t        j                  |j                        }|| _        |j                  j                  | _	        t        j                  |j                        | _        t        |j                  |j                        | _        t        |j                  |j                        | _        | j                          y )Nr  )rK   rL   r"   from_configvision_configvision_towerr  r  language_modelr  audio_configaudio_towerr  embed_visionembed_audior  )rQ   ri   r/  rR   s      r4   rL   zGemma3nModel.__init__u  s     %119M9MN ,,77"..f6H6HI,*0*<*<*W*W'$001D1DE5f6J6JFL^L^_4V5H5H&J\J\]r3   c                 6    | j                   j                         S r]   )r/  get_input_embeddingsrQ   s    r4   r5  z!Gemma3nModel.get_input_embeddings  s    ""7799r3   c                 :    | j                   j                  |       y r]   )r/  set_input_embeddingsrQ   rg  s     r4   r8  z!Gemma3nModel.set_input_embeddings  s    007r3   zOProjects the last hidden state from the vision model into language model space.r5   pixel_valuesr1  r[   c                     | j                   d	|ddd|}|j                  }|j                  |j                  d   | j                  j
                  j                  | j                  j                        j                  ddd      }|| j                  j
                  j                  dz  z  }| j                  |      |_
        |S )
NFT)r:  
do_poolingreturn_dictr   r!   r#   r  r  r2   )r.  r  r   r   ri   r-  rt   vision_soft_tokens_per_imager   r2  pooler_output)rQ   r:  r1  vision_outputsr  s        r4   get_image_featureszGemma3nModel.get_image_features  s     +**sQVdhslrs*<< .55##A&KK%%11KK44
 '!Q
	 	 	T[[66BBCGG'+'8'8GX'8'Y$r3   Nr  r  image_featuresaudio_featuresc           	         || | j                         t        j                  | j                  j                  t        j
                  |j                              k(  }|j                  d      }| | j                         t        j                  | j                  j                  t        j
                  |j                              k(  j                  d      }n2|| j                  j                  k(  }|| j                  j                  k(  }|j                         }|j                  d      j                  |      j                  |j                        }|Qt        ||   j                         |j                         k(  d| d|j                  d   |j                  d   z          |j                         }|j                  d      j                  |      j                  |j                        }|Qt        ||   j                         |j                         k(  d| d|j                  d   |j                  d   z          ||fS )z
        Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
        equal to the length of multimodal features. If the lengths are different, an error is raised.
        r  rT   z6Image features and image tokens do not match, tokens: z, features: r   r#   z6Audio features and audio tokens do not match, tokens: )r5  r/   r   ri   image_token_idlongr   allaudio_token_idr2  r   	expand_asr   r   numelr   )	rQ   r  r  rC  rD  special_image_maskspecial_audio_maskn_image_tokensn_audio_tokenss	            r4   get_placeholder_maskz!Gemma3nModel.get_placeholder_mask  sb    !.2M$2K2K2MT[[77uzzR_RfRfg3 " "4!7!7!;.4,,.LL!;!;5::VcVjVjk c"g  "+dkk.H.H!H!*dkk.H.H!H+//1/99"=GGVYYZgZnZno%"01779^=Q=Q=SSHHXXdeseyeyz{e|  @N  @T  @T  UV  @W  fW  eX  Y
 ,//1/99"=GGVYYZgZnZno%"01779^=Q=Q=SSHHXXdeseyeyz{e|  @N  @T  @T  UV  @W  fW  eX  Y
 "#555r3   input_featuresr*  input_features_maskrk  r@   token_type_idsr  r  	lm_kwargsc                 p	   |du |	duz  rt        d      |} | j                         |      }	t        j                  |dk\  || j                  k        }t        j
                  ||t        j                  |            }| j                  j                  |      }t        j                  || j                  j                  k\  || j                  j                  k        }| j                  j                  | j                  j                  z   dz
  }t        j
                  |||      j                  |	j                        }| j                  |      }|j                  |	j                  |	j                        }|j!                  d      j#                  |	      }t        j
                  |||	      }	|| j                  j                  k\  }| j                  j                  | j                  j                  z   dz
  }t        j
                  |||      j                  |	j                        }| j                  |      }|j                  |	j                  |	j                        }|j!                  d      j#                  |	      }t        j
                  |||	      }	nd}|l| j%                  |d      j&                  }|j                  |	j                  |	j                        }| j)                  ||	|	      \  }}|	j+                  ||      }	|K|H| j-                  || d      }|j&                  }|j.                  }t        j0                  | j                  dz
  ggt        j2                  |j                  
      }| j                  |      } t        j
                  |j!                  d      | |      }|j4                  \  }!}"}#| j6                  j8                  |"z
  }$| j;                  |!|$|#      }%t        j<                  ||%fd      }|j                  |	j                  |	j                        }| j)                  ||	|      \  }}&|	j+                  |&|      }	 | j                  dd|||||	|dd|}'t?        |'j@                  |r|'jB                  nd|'jD                  |'jF                  |nd|      S d      S )a}  
        input_features_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Attention mask for `input_features` where non-zero values mark valid audio frames.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.text_config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.text_config.vocab_size]`.

        Example:

        ```python
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO
        >>> from transformers import AutoProcessor, Gemma3nForConditionalGeneration

        >>> model = Gemma3nForConditionalGeneration.from_pretrained("google/gemma3n2-3b-mix-224")
        >>> processor = AutoProcessor.from_pretrained("google/gemma3n2-3b-mix-224")

        >>> prompt = "Where is the cat standing?"
        >>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))

        >>> inputs = processor(images=image, text=prompt,  return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(**inputs,)
        >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Where is the cat standing?\nsnow"
        ```
        Nr  r   r#   )r  rT   T)r=  )r  rC  r  r   )r  rD  )r  r  r*  rk  r@   r  r  r=  )r  r@   rA   rB   r9   r:   r2   )$r  r5  r/   r  r  r  
zeros_liker/  r  r2  r  r3  r  r   r   r   r   rJ  rB  r@  rP  masked_scatterget_audio_featuresr*   r   rG  r   ri   audio_soft_tokens_per_imager#  r   r8   r  r@   rA   rB   )(rQ   r  r:  rQ  r*  rR  rk  r@   rS  r  r  r  rT  per_layer_inputs_maskper_layer_inputs_tokensr  vision_maskdummy_vision_token_idvision_input_idsvision_embedsexpanded_vision_mask
audio_maskdummy_audio_token_idaudio_input_idsaudio_embedsexpanded_audio_maskrC  rL  r   audio_outputsrD  audio_padding_toksaudio_padding_embsaudio_batch_sizeaudio_seq_lenaudio_embed_dimextra_padding_tokensextra_padding_featuresrM  r  s(                                           r4   ra   zGemma3nModel.forward  s   ` -t";<YZZ 7D557	BM %*$5$5i1niRVRqRqFq$r!&+kk2GTYTdTdenTo&p##22GGH_`  ++T..;;;YIYIYIfIf=fK %)$5$5$B$BTEVEVEaEa$ade$e!${{;	CXY\\]j]q]qr --8H-IM),,]-A-A=CVCVWM#.#8#8#<#F#F}#U !KK(<m][M #d&6&6&C&CCJ#'#3#3#@#@4CSCSC^C^#^ab#b #kk*iAUVYYZgZnZnoO++o+FL'??=+?+?ATATUL","6"6r":"D"D]"S!KK(;\=YM# #!44\t4TbbN+..}/C/C]EXEXYN$($=$=~ %> %! *889K^\M %*=*I 33NEXDXfj3kM*88N&55J "'!0C/D.EUZZ`n`u`u!v!%!1!1<N!1!O"[[)=)=b)ACUWefN?M?S?S<m_#';;#J#J]#Z %7%>%>?OQegv%w""YY8N'OUVWN+..}/C/C]EXEXYN$($=$=~ %> %!A! *889K^\M%$%% 

-)%+'

 

 *%777@G33d!//))2>2JPT2@2L
 	
 SW
 	
r3   zPProjects the last hidden state from the audio encoder into language model space.c                 x     | j                   ||fddi|}| j                  |j                        }||_        |S )a0  
        input_features (`torch.FloatTensor]` of shape `(num_images, seq_length, num_features)`):
            The tensors corresponding to the input audio.
        input_features_mask (`torch.FloatTensor]` of shape `(num_images, seq_length)`):
            The attention mask for the input audio.
        r=  Tr>  )r1  r3  r  r@  )rQ   rQ  rR  r1  rf  rd  s         r4   rX  zGemma3nModel.get_audio_featuresS  sV     9I8H8H/9
=A9
EK9
 ''m6U6U'V&2#r3   r  )NNNNNNNNNNN)r+   r,   r-   accepts_loss_kwargsr%   rL   r5  r8  r   r   r/   r;   r   r   rC   r   rB  r{  rP  rd   r
   rc   r8   ra   r)   rX  re   rf   s   @r4   r*  r*  k  si     } :8 !rs'' +, 
+	+	 t , .2263737*6##d**6 ((4/*6 ))D0	*6
 ))D0*6X  .21537.23704(,2626*.!%F
##d*F
 ''$.F
 ))D0	F

 t+F
 #\\D0F
 &&-F
 F
 ((4/F
 ((4/F
   4'F
 $;F
 ./F
 
$F
 F
P !st #\\ +,	
 
/	/ u r3   r*  z
    The base Gemma 3n model comprising a vision backbone, an audio backbone, a language model, and a language modeling
    head.
    c                       e Zd ZddiZdef fdZd Zd Zede	j                  dee   fd	       Zee	 	 	 	 	 	 	 	 	 	 	 	 dde	j                  d
z  de	j                  d
z  de	j                  d
z  de	j                   d
z  de	j                   d
z  de	j                  d
z  ded
z  de	j                  d
z  de	j                  d
z  de	j                  d
z  ded
z  dee	j                   z  dee   defd              Z	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )Gemma3nForConditionalGenerationr  z(model.language_model.embed_tokens.weightri   c                     t         |   |       t        |      | _        t	        j
                  |j                  j                  |j                  j                  d      | _	        | j                          y r  )rK   rL   r*  r~  rM   r|   r  rt   r  r	  r  r  s     r4   rL   z(Gemma3nForConditionalGeneration.__init__s  sS     !&)
yy!3!3!?!?ASASA^A^ejkr3   c                 6    | j                   j                         S r]   )r~  r5  r6  s    r4   r5  z4Gemma3nForConditionalGeneration.get_input_embeddingsy  s    zz..00r3   c                 :    | j                   j                  |       y r]   )r~  r8  r9  s     r4   r8  z4Gemma3nForConditionalGeneration.set_input_embeddings|  s    

''.r3   r:  r1  c                 <     | j                   j                  |fi |S r]   )r~  rB  )rQ   r:  r1  s      r4   rB  z2Gemma3nForConditionalGeneration.get_image_features  s    ,tzz,,\DVDDr3   Nr  rQ  r*  rR  rk  r@   rS  r  r  r  r  rT  r[   c                     | j                   d	|||||||||	|
|dd|}|j                  }t        |t              rt	        | d      n|}| j                  |dd|ddf         }| j                  j                         j                  x}||z  }t        j                  |      }||z  }d}|
O|j                         }|dddddf   }|
dddf   }||dd|j                  d    df   j                  |j                        }||j                  |j                        dk7     j                         }||j                  |j                        dk7     j                         }n |j                         }|j                         }t!        j"                         }|j%                  d| j                  j&                  j(                        }|j%                  d      j                  |j                        } |||      }t+        |||j,                  |j.                  |j0                  |j2                  |j4                        S )
a  
        input_features_mask (torch.Tensor, *optional*, defaults to None):
            The attention mask for the input audio.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.text_config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are
            ignored (masked), the loss is only computed for the tokens with labels in
            `[0, ..., config.text_config.vocab_size]`.

        Example:

        ```python
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO
        >>> from transformers import AutoProcessor, Gemma3ForConditionalGeneration

        >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/gemma-3-4b-it")
        >>> processor = AutoProcessor.from_pretrained("google/gemma-3-4b-it")

        >>> messages = [
        ...     {
        ...         "role": "system",
        ...         "content": [
        ...             {"type": "text", "text": "You are a helpful assistant."}
        ...         ]
        ...     },
        ...     {
        ...         "role": "user", "content": [
        ...             {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
        ...             {"type": "text", "text": "Where is the cat standing?"},
        ...         ]
        ...     },
        ... ]

        >>> inputs = processor.apply_chat_template(
        ...     messages,
        ...     tokenizer=True,
        ...     return_dict=True,
        ...     return_tensors="pt",
        ...     add_generation_prompt=True
        ... )
        >>> # Generate
        >>> generate_ids = model.generate(**inputs)
        >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "user\nYou are a helpful assistant.\n\n\n\n\n\nWhere is the cat standing?\nmodel\nBased on the image, the cat is standing in a snowy area, likely outdoors. It appears to"
        ```
        T)r  r:  rQ  r*  rR  rk  r@   rS  r  r  r  r=  N.rT   r#   r   )r>   r?   r@   rA   rB   r9   r:   r2   )r~  r  r  rb   r  r	  ri   get_text_configr  r/   r  r^   r   r   r   r   rM   CrossEntropyLossr  r  r  r=   r@   rA   rB   r9   r:   )rQ   r  r:  rQ  r*  rR  rk  r@   rS  r  r  r  r  rT  r  rA   r  r?   r  r>   shift_logitsshift_labelsshift_attention_maskloss_fctflat_logitsflat_labelss                             r4   ra   z'Gemma3nForConditionalGeneration.forward  sN   D $** 
%)) 3%+)'
 
   118B>SV8W~ot4]kmA}a,?@A'+{{'B'B'D'\'\\#i55FZZ'F55F\\^F!#ssA+.L!#qr'?L) (6a,:L:LQ:O9O9Q6Q'R'U'UV\VcVc'd$+,@,C,CFMM,RVW,WXcce+,@,C,CLDWDW,X\],]^iik+668+668**,H&++B0G0G0R0RSK&++B/22<3F3FGKK5D,#33!//)) ' ; ; ' ; ;
 	
r3   c                 `    t        |   |f|||||
||	|d|}|s|
s||d<   ||d<   ||d<   |S )N)r@   r  r*  rk  r  r  rS  is_first_iterationr:  rQ  rR  )rK   prepare_inputs_for_generation)rQ   r  r@   r  rk  r:  rQ  r*  rR  rS  r  r  r  r  r1  model_inputsrR   s                   r4   r  z=Gemma3nForConditionalGeneration.prepare_inputs_for_generation  sk    $ w<
+')%))1
 
  Y+7L(-;L)*2EL./r3   )NNNNNNNNNNNr   )NNNNNNNNTNNF)r+   r,   r-   r  r%   rL   r5  r8  r   r/   r;   r   r   rB  r   r{  rd   r
   rc   rb   r=   ra   r  re   rf   s   @r4   rq  rq  j  s    +,VW} 1/ Eu/@/@ EFSeLf E E  .21537.23704(,2626*.!%-.w
##d*w
 ''$.w
 ))D0	w

 t+w
 #\\D0w
 &&-w
 w
 ((4/w
 ((4/w
   4'w
 $;w
 ell*w
 ./w
 
'w
  w
x   ' 'r3   rq  )r  r  rq  r*  r}  r  )r   NN)r#   )cr~   collections.abcr   r   dataclassesr   typingr   r/   torch.nnrM   torch.nn.functionalr   rh   r   r  r  r	   cache_utilsr
   r   
generationr   integrationsr   masking_utilsr   r   modeling_layersr   modeling_outputsr   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   r   utils.genericr   r   utils.output_capturingr    autor"   configuration_gemma3nr$   r%   r&   r'   r)   r8   r=   ModulerE   rh   r   r(  rJ  ro  r  r  r  r  r  r  r  r  r  r   rd   rb   r&  r^   rC   r4  r7  r9  r\  r}  r  r  r  r  r  r*  rq  __all__r2   r3   r4   <module>r     s
  *  . !      & ! . ) / R 9 k k K F &  H 5  l l 3%? 3  3 
9!8 9 9( 
9K 9 9:4RYY 40g)BII g)TaBII aHj,bii j,ZB7		 B7JF")) FRORYY O8Dryy D2(ryy (V 6SR\\ S;RYY ;$#5RYY #5L`'ryy `'F(	UU\\ 	U# 	U%,, 	U$   %II%<<% 
% <<	%
 LL4'% S[% T\% T\% 5<<%&%D.ELL .u|| .%,, ._b ., )*j)299 j) +j)ZG%8 G%T 8T_ 8T 8TvN
0 N
bN<RYY N<b abA
- A
 cA
H ^_L
/ L
 `L
^/B		 /Bd v) vvr u&<o uupr3   