
    i                     R   d dl Z d dlmZ d dlmZ d dlmZ d dlZd dlmZ ddl	m
Z ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlmZ ddlmZmZmZm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5m6Z6 d Z7 G d dejp                        Z9d Z:d Z; G d dejp                        Z<	 	 dadejp                  dejz                  d ejz                  d!ejz                  d"ejz                  dz  d#e>dz  d$e>d%e)e+   fd&Z? G d' d(ejp                        Z@ G d) d*ejp                        ZA G d+ d,ejp                        ZBd- ZC G d. d/ejp                        ZD G d0 d1ejp                        ZE G d2 d3e      ZF G d4 d5ejp                        ZG G d6 d7ejp                        ZHe, G d8 d9e'             ZI G d: d;eI      ZJ G d< d=ejp                        ZK G d> d?ejp                        ZL G d@ dAejp                        ZMee, G dB dCe!                    ZN G dD dEejp                        ZO G dF dGejp                        ZP edH       G dI dJejp                               ZQ G dK dLejp                        ZR G dM dNejp                        ZSdO ZT edP      dbdQ       ZUdRejz                  dSeVdTejz                  fdUZW eeU       G dV dWejp                               ZX G dX dYe      ZYe, G dZ d[e'             ZZ G d\ d]eZ      Z[ G d^ d_eZe      Z\g d`Z]y)c    N)Callable)	dataclass)Optional)nn   )initialization)ACT2FN)CacheDynamicCache)GenerationMixin)use_kernel_forward_from_hubuse_kernel_func_from_hubuse_kernelized_func)create_bidirectional_maskcreate_causal_mask)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentionsBaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)maybe_autocastmerge_with_config_defaults)OutputRecordercapture_outputs   )EvollaConfigSaProtConfigc                     | j                  |      j                         }t        j                  |d      j	                  |      |z  }|j                         |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r$   dim)neinttorchcumsumtype_aslong)	input_idspadding_idxmaskincremental_indicess       {/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/evolla/modeling_evolla.py"create_position_ids_from_input_idsr5   4   sP     <<$((*D,,t3;;DADH##%33    c                   8     e Zd ZdZ fdZ	 	 	 	 ddZd Z xZS )EvollaSaProtEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                    t         |           t        j                  |j                  |j
                  |j                        | _        |j                  r1t        j                  |j
                  |j                        | _        nd | _        t        j                  |j                        | _        t        |dd      | _        | j#                  dt%        j&                  |j(                        j+                  d      d       |j                  | _        | j                   dk(  r;t        j                  |j(                  |j
                  | j,                        | _        |j0                  | _        |j2                  | _        d | _        y )	N)r1   epsposition_embedding_typeabsoluteposition_ids)r$   F
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsemb_layer_norm_before	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropoutgetattrr<   register_bufferr,   arangemax_position_embeddingsexpandr1   position_embeddingstoken_dropoutmask_token_idr>   selfconfig	__class__s     r4   rC   zEvollaSaProtEmbeddings.__init__I   s2   !||F,=,=v?Q?Q_e_r_rs'' ll6+=+=6CXCXYDO"DOzz&"<"<='.v7PR\']$ELL)G)GHOOPWXej 	 	
 "..'':5')||..0B0BPTP`P`(D$ $11#11 r6   c                    |*|t        || j                        }n| j                  |      }|| j                  |      }|}| j                  r||j                  || j                  k(  j                  d      d      }d}||j                  d      n|j                  d   }|| j                  k(  j                  d      j                         |z  }|d|z
  z  d|z
  d d d d f   z  j                  |j                        }| j                  dk(  r| j                  |      }	||	z   }| j                  | j                  |      }|-||j                  d      z  j                  |j                        }|S )Nr?           gQ?r$   r=   )r5   r1   &create_position_ids_from_inputs_embedsrH   rV   masked_fillrW   	unsqueezesumshapefloattodtyper<   rU   rL   )
rY   r0   attention_maskr>   inputs_embeds
embeddingsmask_ratio_trainsrc_lengthsmask_ratio_observedrU   s
             r4   forwardzEvollaSaProtEmbeddings.forwardb   s    $A)TM]M]^#JJ=Y  00;M #
 )"7#//d>P>P1P0[0[\^0_adeJ)4B4N.,,R0T]TcTcdeTfK#,0B0B#B"G"G"K"Q"Q"SVa"a$,<(<=EXAXZ[]acgZg@hhll  J '':5"&":":<"H#&99J??&4J%$~'?'?'CCGG
HXHXYJ r6   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr?   r$   )re   devicer   )sizer,   rR   r1   r/   rn   r`   rT   )rY   rg   input_shapesequence_lengthr>   s        r4   r^   z=EvollaSaProtEmbeddings.create_position_ids_from_inputs_embeds   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r6   )NNNN)__name__
__module____qualname____doc__rC   rl   r^   __classcell__r[   s   @r4   r8   r8   D   s&    !6 /b=r6   r8   c                 b    | j                  dd      \  }}t        j                  | |fd      S )N   r?   r(   )chunkr,   catxx1x2s      r4   rotate_half_esmr      s/    WWQBWFB99rc2YB''r6   c                     |d d d d d | j                   d   d d f   }|d d d d d | j                   d   d d f   }| |z  t        |       |z  z   S )N)rb   r   )r}   cossins      r4   apply_rotary_pos_emb_esmr      sY    
aMaggbkM1$
%C
aMaggbkM1$
%CG*S011r6   c                        e Zd ZU dZej
                  ed<   def fdZd
dZ	dej
                  dej
                  de
ej
                  ej
                  f   fd	Z xZS )EvollaSaProtRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    inv_freqr)   c                     t         |           || _        ddt        j                  d|dt        j
                        j                         |z  z  z  }| j                  d|       d | _        d | _	        d | _
        y )N      ?'  r   ry   re   r   )rB   rC   r)   r,   rR   int64rc   rQ   _seq_len_cached_cos_cached_sin_cached)rY   r)   r   r[   s      r4   rC   z$EvollaSaProtRotaryEmbedding.__init__   sn    %ELLC%++$N$T$T$VY\$\]^Z2#r6   c                 t   |j                   |   }|| j                  k7  s#| j                  j                  |j                  k7  r|| _        t	        j
                  |j                   |   |j                        j                  | j                        }t	        j                  || j                        }t	        j                  ||fd      j                  |j                        }|j                         d d d d d d f   | _        |j                         d d d d d d f   | _        | j                  | j                  fS )Nrn   r?   r(   )rb   r   r   rn   r,   rR   r.   r   outerr{   rd   r   r   r   )rY   r}   seq_dimensionseq_lentfreqsembs          r4   _update_cos_sin_tablesz2EvollaSaProtRotaryEmbedding._update_cos_sin_tables   s    ''-( d***d.>.>.E.E.Q#*D QWW]3AHHEMMdmm\AKK4==1E))UEN366qxx@C"wwytQ)9:D"wwytQ)9:D!1!111r6   qkreturnc                 .   | j                  |d      \  | _        | _        t        || j                  | j                        j	                  |j
                        t        || j                  | j                        j	                  |j
                        fS )Nr   )r   r   )r   r   r   r   rd   re   )rY   r   r   s      r4   rl   z#EvollaSaProtRotaryEmbedding.forward   s    -1-H-HZ\-H-]*$* %Q(8(8$:J:JKNNUVU\U\N]$Q(8(8$:J:JKNNUVU\U\N]
 	
r6   )ry   )rr   rs   rt   ru   r,   Tensor__annotations__r+   rC   r   tuplerl   rv   rw   s   @r4   r   r      sY     ll	 C 	 2 
 
%,, 
5u||A[;\ 
r6   r   modulequerykeyvaluerf   scalingrO   kwargsc                    ||j                  d      dz  }t        j                  ||j                  dd            |z  }|||z   }t        j
                  j                  |d      }t        j
                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )Nr?         ry   r   r(   )ptrainingr$   )
ro   r,   matmul	transposer   
functionalsoftmaxrO   r   
contiguous)
r   r   r   r   rf   r   rO   r   attn_weightsattn_outputs
             r4   eager_attention_forwardr      s     **R.D( <<s}}Q':;gEL!#n4==((2(>L==((6??([L,,|U3K''1-88:K$$r6   c                        e Zd Zd
 fd	Z	 	 	 ddej
                  dej                  dz  dej                  dz  dej                  dz  dee   de	ej
                     fd	Z
 xZS )EvollaSaProtSelfAttentionNc                    t         |           || _        |j                  |j                  z  dk7  r2t        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        |j                  | _        d | _        |xs t%        |dd      | _        | j&                  dk(  rt)        | j                  	      | _        |j*                  | _        || _        d
| _        | j*                  xr | | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r<   r=   rotaryr(   r   )rB   rC   rZ   rF   num_attention_headshasattr
ValueErrorr+   attention_head_sizeall_head_sizer   Linearr   r   r   attention_probs_dropout_probrO   rotary_embeddingsrP   r<   r   
is_decoder	layer_idxr   	is_causal)rY   rZ   r<   r   is_cross_attentionr[   s        r4   rC   z"EvollaSaProtSelfAttention.__init__   s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
::!%'> (
'-zC
$ ''83%@TE]E]%^D" ++"C1C-Cr6   hidden_statesrf   encoder_hidden_statesencoder_attention_maskr   r   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }|d u}	|	r|n|}
|	r|n|}| j                  |
      j                  |      j	                  dd      }| j                  |
      j                  |      j	                  dd      }|| j                  dz  z  }| j                  dk(  r| j                  ||      \  }}t        j                  | j                  j                  t              } || ||||f| j                  sdn| j                  | j                   d|\  }} |j"                  g |d j%                         }||fS )Nr?   r$   ry   r   r   r]   rO   r   )rb   r   r   viewr   r   r   r<   r   r   get_interfacerZ   _attn_implementationr   r   rO   r   reshaper   )rY   r   rf   r   r   r   rp   hidden_shapequery_layerr   current_states	key_layervalue_layerattention_interfacer   r   s                   r4   rl   z!EvollaSaProtSelfAttention.forward  s    $))#2.CCbC$*B*BCjj/44\BLLQPQR2$>2D.-3E/>HH^,11,?II!QO	jj055lCMMaQRS "D$<$<d$BB''83%)%;%;K%S"K(?(M(MKK,,.E)
 %8	%
  $}}C$,,LL	%
 	%
!\ *k));;;;FFHL((r6   )NNFNNN)rr   rs   rt   rC   r,   r   FloatTensorr   r   r   rl   rv   rw   s   @r4   r   r      s    DF 48:>;?,)||,) ))D0,)  %0047	,)
 !& 1 1D 8,) +,,) 
u||	,)r6   r   c                   $     e Zd Z fdZd Z xZS )EvollaSaProtSelfOutputc                     t         |           t        j                  |j                  |j                        | _        t        j                  |j                        | _        y N)	rB   rC   r   r   rF   denserM   rN   rO   rX   s     r4   rC   zEvollaSaProtSelfOutput.__init__K  sB    YYv1163E3EF
zz&"<"<=r6   c                 T    | j                  |      }| j                  |      }||z   }|S r   r   rO   rY   r   input_tensors      r4   rl   zEvollaSaProtSelfOutput.forwardP  .    

=1]3%4r6   rr   rs   rt   rC   rl   rv   rw   s   @r4   r   r   J      >
r6   r   c                   :     e Zd Zd fd	Z	 	 	 ddee   fdZ xZS )EvollaSaProtAttentionc                     t         |           t        |||      | _        t	        |      | _        t        j                  |j                  |j                        | _        y )N)r   r   r:   )
rB   rC   r   rY   r   outputr   rJ   rF   rK   )rY   rZ   r   r   r[   s       r4   rC   zEvollaSaProtAttention.__init__X  sI    -f	^pq	,V4f&8&8f>S>STr6   r   c                     | j                  |      } | j                  |f|||d|\  }}| j                  ||      }|S )Nrf   r   r   )rJ   rY   r   )	rY   r   rf   r   r   r   hidden_states_lnr   _s	            r4   rl   zEvollaSaProtAttention.forward_  sZ      >>-8"
)"7#9	

 
Q kk+}=r6   )NFr   )rr   rs   rt   rC   r   r   rl   rv   rw   s   @r4   r   r   W  s)    U "# +,r6   r   c                 j    | dz  dt        j                  | t        j                  d      z        z   z  S )zz
    This is the gelu implementation from the original EVOLLA_SA_PROT repo. Using F.gelu yields subtly wrong results.
    g      ?r   g       @)r,   erfmathsqrt)r}   s    r4   gelur   s  s.     s7cEIIa$))C.&899::r6   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )EvollaSaProtIntermediatec                     t         |           t        j                  |j                  |j
                        | _        y r   )rB   rC   r   r   rF   intermediate_sizer   rX   s     r4   rC   z!EvollaSaProtIntermediate.__init__{  s,    YYv1163K3KL
r6   r   r   c                 >    | j                  |      }t        |      }|S r   )r   r   )rY   r   s     r4   rl   z EvollaSaProtIntermediate.forward  s     

=1]+r6   rr   rs   rt   rC   r,   r   rl   rv   rw   s   @r4   r   r   z  s$    MU\\ ell r6   r   c                   $     e Zd Z fdZd Z xZS )EvollaSaProtOutputc                     t         |           t        j                  |j                  |j
                        | _        t        j                  |j                        | _	        y r   )
rB   rC   r   r   r   rF   r   rM   rN   rO   rX   s     r4   rC   zEvollaSaProtOutput.__init__  sB    YYv779K9KL
zz&"<"<=r6   c                 T    | j                  |      }| j                  |      }||z   }|S r   r   r   s      r4   rl   zEvollaSaProtOutput.forward  r   r6   r   rw   s   @r4   r   r     r   r6   r   c                   >     e Zd Z fdZ	 	 	 ddee   fdZd Z xZS )EvollaSaProtLayerc                    t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r,| j                  st        |  d      t	        |d      | _	        t        |      | _        t        |      | _        t        j                  |j                   |j"                        | _        y )Nr$   z> should be used as a decoder model if cross attention is addedT)r   r:   )rB   rC   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attentionRuntimeErrorcrossattentionr   intermediater   r   r   rJ   rF   rK   rX   s     r4   rC   zEvollaSaProtLayer.__init__  s    '-'E'E$.v6 ++#)#=#= ##??"dV+i#jkk"7SW"XD4V<(0f&8&8f>S>STr6   r   c                      | j                   |fd|i|}| j                  r4|2t        | d      st        d|  d       | j                  |f|||d|}| j                  |      }|S )Nrf   r  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )r   r   r   AttributeErrorr  feed_forward_chunk)rY   r   rf   r   r   r   attention_outputlayer_outputs           r4   rl   zEvollaSaProtLayer.forward  s     *4>>
)
 
 ??4@4!12$=dV D` ` 
  3t22  -&;'=	 
   ../?@r6   c                 n    | j                  |      }| j                  |      }| j                  ||      }|S r   )rJ   r  r   )rY   r  attention_output_lnintermediate_outputr  s        r4   r  z$EvollaSaProtLayer.feed_forward_chunk  s<    "nn-=>"//0CD{{#68HIr6   r   )	rr   rs   rt   rC   r   r   rl   r  rv   rw   s   @r4   r   r     s/    U$ "# +,@r6   r   c                   B     e Zd Z fdZe	 	 	 ddee   fd       Z xZS )EvollaSaProtEncoderc                 0   t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        t        j                  |j                  |j                        | _        d| _        y c c}w )Nr:   F)rB   rC   rZ   r   
ModuleListrangenum_hidden_layersr   layerrJ   rF   rK   emb_layer_norm_aftergradient_checkpointing)rY   rZ   r   r[   s      r4   rC   zEvollaSaProtEncoder.__init__  sn    ]]uVMeMeGf#g!$5f$=#gh
$&LL1C1CI^I^$_!&+# $hs   Br   c                     t        | j                        D ]  \  }} ||f|||d|} | j                  r| j                  |      }t        |      S )Nr   )last_hidden_state)	enumerater  r  r   )rY   r   rf   r   r   r   ilayer_modules           r4   rl   zEvollaSaProtEncoder.forward  sk      )4 	OA|(-&;'=	
 M	 $$ 55mDM1MRRr6   r   )	rr   rs   rt   rC   r   r   r   rl   rv   rw   s   @r4   r  r    s:    ,  "#S +,S Sr6   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )EvollaSaProtPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )rB   rC   r   r   rF   r   Tanh
activationrX   s     r4   rC   zEvollaSaProtPooler.__init__  s9    YYv1163E3EF
'')r6   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r  )rY   r   first_token_tensorpooled_outputs       r4   rl   zEvollaSaProtPooler.forward  s6     +1a40

#566r6   r   rw   s   @r4   r  r    s#    $
U\\ ell r6   r  c                   t     e Zd ZU eed<   dgZdZdZdZdZ	e
 eedd      g eedd      gdZ fd	Z xZS )
EvollaSaProtPreTrainedModelrZ   r   Tr$   r   )index
layer_namer  )r   
attentionscross_attentionsc                 *   t         |   |       t        |t              rsddt	        j
                  d|j                  dt        j                        j                         |j                  z  z  z  }t        j                  |j                  |       y y )Nr   r   r   ry   r   )rB   _init_weights
isinstancer   r,   rR   r)   r   rc   initcopy_r   )rY   r   r   r[   s      r4   r(  z)EvollaSaProtPreTrainedModel._init_weights	  sm    f%f9:eQ

AU[[(Y(_(_(adjdndn(nopHJJv1 ;r6   )rr   rs   rt   r&   r   _no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr   r"   r   _can_record_outputsr(  rv   rw   s   @r4   r"  r"    sd    ,-N"& +%&?qU`ab4AJZ[
2 2r6   r"  c            
            e Zd Zdef fdZd Zd Zee	 d
de	j                  dz  de	j                  dz  dee	j                     ez  fd	              Z xZS )EvollaSaProtProteinEncoderrZ   c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r   )rB   rC   r8   rh   r  encoder	post_initrX   s     r4   rC   z#EvollaSaProtProteinEncoder.__init__  s2     08*62r6   c                 .    | j                   j                  S r   rh   rH   rY   s    r4   get_input_embeddingsz/EvollaSaProtProteinEncoder.get_input_embeddings  s    ...r6   c                 &    || j                   _        y r   r8  rY   r   s     r4   set_input_embeddingsz/EvollaSaProtProteinEncoder.set_input_embeddings  s    */'r6   Nr0   rf   r   c                 ^   |j                         }|\  }}|j                  }|t        j                  ||f|      }| j	                  ||      }t        | j                  ||      } | j                  |fd|i|}	|	d   }
t        |
|	j                  |	j                  |	j                        S )Nr   r0   rf   )rZ   rg   rf   rf   r   )r  r   r%  r&  )ro   rn   r,   onesrh   r   rZ   r5  r   r   r%  r&  )rY   r0   rf   r   rp   
batch_size
seq_lengthrn   rg   encoder_outputssequence_outputs              r4   rl   z"EvollaSaProtProteinEncoder.forward  s      nn&!,
J!!!"ZZ*j)A6RN)N[2;;')
 '$,,}^^^W]^)!,;-)77&11,==	
 	
r6   r   )rr   rs   rt   r&   rC   r:  r=  r!   r#   r,   r   r   r   rl   rv   rw   s   @r4   r3  r3    sw    | /0   /3
<<$&
 t+

 
u||	K	K
   
r6   r3  c                   &     e Zd Zd fd	Zd Z xZS )!EvollaSequenceCompressorAttentionc                 j   t         |           |dz  | _        || _        ||z  }t	        j
                  |      | _        t	        j
                  |      | _        t	        j                  ||d      | _	        t	        j                  ||dz  d      | _
        t	        j                  ||d      | _        y )Nr   Fbiasry   )rB   rC   scaleheadsr   rJ   
norm_medianorm_latentsr   to_qto_kvto_out)rY   r)   dim_headrK  	inner_dimr[   s        r4   rC   z*EvollaSequenceCompressorAttention.__init__?  s    t^

u$	,,s+LL-IIc959	YYsIM>
ii	3U;r6   c                 F   | j                  |      }| j                  |      }| j                  }| j                  |      }t	        j
                  ||fd      }| j                  |      j                  dd      \  }}|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|| j                  z  }t	        j                  ||j                  dd            }	|	|	j                  dd	      j                         z
  }	|	j                   \  }
}}}t	        j"                  ||      j%                  |j&                        }|d
d
d
d
d
d
f   }|d
d
d
d
d
d
f   }||z  }|	j)                  d|z
  j+                         d      }	|	j-                  d      }t	        j                  ||      }|j                  dddd      }|j/                  |j                  d      |j                  d      d      }| j1                  |      S )z
        Args:
            x (torch.Tensor): image features
                shape (b, n1, D)
            latent (torch.Tensor): latent features
                shape (b, n2, D);  n2: num of latent tokens
        r   r(   ry   r?   r   r$   r   Tr)   keepdimNg     )rL  rM  rK  rN  r,   r{   rO  rz   r   ro   permuterJ  r   r   amaxdetachrb   r@  rd   rn   r_   boolr   r   rP  )rY   r}   latentsr2   hr   kv_inputr   vsimbsnhskdokdr@  mask_expones_expattnouts                      r4   rl   z)EvollaSequenceCompressorAttention.forwardL  sB    OOA##G,JJIIg99a\r2zz(#))2 * 
1 FF166!9affQiB/771aCFF166!9affQiB/771aCFF166!9affQiB/771aC

N ll1akk"b12CHHTH299;;99BSzz"c"%%dkk24q()aD()("ooq4xoo/6{{r{"ll4#kk!Q1% kk#((1+sxx{B7{{3r6   )@      r   rw   s   @r4   rF  rF  >  s    <) r6   rF  c                   &     e Zd Zd fd	Zd Z xZS )EvollaFeedForwardc                    t         |           t        ||z        }t        j                  |      | _        t        j                  ||d      | _        t        j                         | _	        t        j                  ||d      | _
        y NFrH  )rB   rC   r+   r   rJ   normr   fc1GELUr  fc2)rY   r)   multrR  r[   s       r4   rC   zEvollaFeedForward.__init__y  s`    d
O	LL%	99S)%8'')99Y%8r6   c           	      ~    | j                  | j                  | j                  | j                  |                        S r   )rp  r  rn  rm  )rY   r}   s     r4   rl   zEvollaFeedForward.forward  s+    xx1(>?@@r6   )   r   rw   s   @r4   rj  rj  x  s    9Ar6   rj  c                   *     e Zd Zdef fdZd Z xZS )!EvollaSequenceCompressorResamplerrZ   c           
         t         |           |j                  j                  }|j                  | _        t        j                  t        j                  | j
                  |      d      | _
        t        j                  g       | _        t        |j                        D ]g  }| j                  j                  t        j                  t!        ||j"                  |j$                        t'        ||j(                        g             i t        j*                  |j                        | _        t        j.                  ||j                        | _        y )NT)requires_grad)r)   rQ  rK  )r)   rq  )rB   rC   protein_encoder_configrF   resampler_num_latentsnum_latentsr   	Parameterr,   randnrZ  r  layersr  resampler_depthappendrF  resampler_dim_headresampler_headsrj  resampler_ff_multrJ   rm  r   protein_projector)rY   rZ   protein_repr_dimr   r[   s       r4   rC   z*EvollaSequenceCompressorResampler.__init__  s   !88DD!77||EKK0@0@BR$ScghmmB'v--. 
	AKK9 06;T;T\b\r\r *.>VE]E]^		
	 LL!3!34	!#+;V=O=O!Pr6   c                 j   |j                   d   }|j                   \  }}t        j                  || j                        j	                  |j
                        }t        j                  ||fd      }t        j                  |      j	                  | j                  j
                        }| j                  d    |j                  ddd      z  }|j	                  |j                        }| j                  D ]  \  }	}
 |	|||      |z   } |
|      |z   } | j                  |      }| j                  |      S )Nr   r$   r(   r?   )rb   r,   r@  rz  rd   rn   r{   rZ  r   re   r}  r  rm  )rY   embedsr2   br_  r   latent_maskr@  rZ  re  fftransformed_features               r4   rl   z)EvollaSequenceCompressorResampler.forward  s   LLO

AjjT%5%5699$++Fyy$,!4 zz!} 3 34,,t$tyyQ'::**V\\* 	,HD"67D1G;GkG+G	, #44W=yy,--r6   )rr   rs   rt   r%   rC   rl   rv   rw   s   @r4   ru  ru    s    Q| Q*.r6   ru  c                       e Zd ZU dZej
                  dz  ed<   dZej
                  dz  ed<   dZe	ej
                  df   dz  ed<   dZ
e	ej
                  df   dz  ed<   y)EvollaProteinEncoderModelOutputNsequence_compressor_outputr  .r   r%  )rr   rs   rt   r  r,   r   r   r  r   r   r%   r6   r4   r  r    so     <@ 1 1D 8?26u((4/6:>M5**C/047>7;Je'',-4;r6   r  c                   f     e Zd Zdef fdZedej                  dej                  fd       Z	 xZ
S )EvollaProteinEncoderrZ   c                 z    t         |           t        |j                        | _        t        |      | _        y )NrZ   )rB   rC   r3  rx  modelru  sequence_compressor_resamplerrX   s     r4   rC   zEvollaProteinEncoder.__init__  s.    /v7T7TU
-NV\-]*r6   r0   rf   c                     | j                  ||      }|j                  }| j                  ||      }t        ||j                        S )Nr?  )r  r  )r  r  r  r  )rY   r0   rf   r   protein_outputprotein_embedssequence_reprs          r4   rl   zEvollaProteinEncoder.forward  sJ    iW'99::>>Z.'4,>>
 	
r6   )rr   rs   rt   r%   rC   r   r,   
LongTensorr   rl   rv   rw   s   @r4   r  r    s?    ^| ^
 
!1!1 
5CTCT 
 
r6   r  c                   b     e Zd Z	 	 	 ddedz  dedz  dedz  f fdZd Z	 	 	 	 	 	 	 d	dZ xZS )
#EvollaSequenceAlignerCrossAttentionNprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                    t         |           |j                  | _        |j                  | _        | j                  dz  | _        t        | j                  | j                  z        | _        | j                  | j                  z  | _        |j                  }|j                  }|j                  }t        j                  | j                  | j                        | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        t)        | j                        | _        t        j,                  |      | _        t        j                  | j                  | j                  |      | _        t3        | j                  |      | _        t        j6                  t9        j:                  dg            | _        t        j6                  t9        j:                  dg            | _        y )Nr   rH  r]   ) rB   rC   rF   r   rJ  r+   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normrM   rO   out_projrj  r  r{  r,   tensorgate_attentiongate_ffw)	rY   rZ   r  r  r  r   enable_biasffn_multr[   s	           r4   rC   z,EvollaSequenceAlignerCrossAttention.__init__  s    	!--#)#=#= --t3
#&t'7'7$:R:R'R#S !558P8PP'-'R'R$00**YYt//1C1CD
*!yy)<d>P>PQD!#+>@R@R!SD#D!%D ,!#+@$BTBT!UD#%99-BDDVDV#WD !%D#'D &99_d6H6HIDLYY8J8JKDNDL!DN+D,<,<=zz">?		$"2"2D4D4D;W#D$4$4h? ll5<<+>?U\\3%%89r6   c	                    |||g}	|	D 
cg c]  }
|
|
	 }	}
|	st        d      t        j                  |	d      }	| j                  |      }| j	                  |      }| j
                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|| j"                  z  }|Mt        j$                  |j                  d      |j                  d            j                  |j&                        }|ddddddf   |	ddddddf   z  }t        j(                  ||j+                  dd	            }||j-                  dd
      j/                         z
  }|j1                  d|z
  j3                         t        j4                  |j6                        j8                        } t;        j<                  d      |      }t        j(                  ||      }|j!                  dddd      j?                         }|j                         dd	 | j@                  fz   } |j                  | }| jC                  |      }|S c c}
w c c}
w c c}
w )z
        query_states: text
        key_value_states: protein
        query_states: [bs, query_seq_len, dim]
        key_value_states: [bs, kv_seq_len, dim]
        query_attn_mask: [bs, query_seq_len]
        kv_attn_mask: [bs, kv_seq_len]
        Nz=At least one modality should be provided for cross attention.r$   r(   r?   r   ry   r   r   TrT  )"r   r,   r{   r  r   r  r  rd   r  r  r  r  ro   r   r   r   rV  rJ  r@  rn   r   r   rW  rX  r_   rY  finfore   minr   Softmaxr   r   r  )rY   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskr   r   key_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msar   r   new_query_layer_shapenew_key_layer_shapenew_value_layer_shaperf   r   attention_scoresattention_probscontext_layernew_context_layer_shapes                                r4   cross_attentionz3EvollaSequenceAlignerCrossAttention.cross_attention  si   * -.DFVW#/Aa1=AA\]]yy15)),7 jj-'D,>,>,J'?'B'B<'P$ $ 0 01I J"&"4"45M"N $"&)d.B.B.N)C)F)F|)T&"&"4"45O"P$($8$89S$T!"&$(!<<#(B#7#:#:<#H  LL)=>M"nn-ABO M"O&(;]K	 );1Q]Q;	;IIiQ/	*,A?S"-?Qq??ii3 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR'nn.s3$$$$7
 
 #INN$78@@Aq!L	 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR!DJJ. "#jj):):1)=|?P?PQR?STWWXdXkXklO(D!T)9:\!TSWYZJZ=[[||K1D1DR1LM#l&7&7B&7&M&T&T&VV'33%%'\5G5G)H)L)L
 -"**,-=> _kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CDm4q BL < @s"   P5P5P:P:P?P?c           
      ^   |z|j                   \  }}}|jt        j                  ||      j                  |	j                        |	j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |
j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |j                  ||f      j                  z  j                  |j                        }nd }|}||j                         s$||j                         s||j                         rz|}| j                  ||||||||      }t        j                  | j                        |z  }||z   }|}| j                  |      t        j                  | j                        z  }||z   }|S )N)ro   )r  r  r  r  r  r  r  r  )rb   r,   r@  rd   rn   rT   Tanyr  tanhr  r  r  )rY   r  protein_kv_statesstructure_kv_statesmsa_kv_statesr  r  r  r  protein_batch_maskstructure_batch_maskmsa_batch_maskpast_key_valuesr_  protein_kv_seq_lenr)   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r4   rl   z+EvollaSequenceAlignerCrossAttention.forwardo  sL    (*;*A*A'B"C#+JJr#5699:L:S:ST(//6H"5M/NPPQ"&--. %
 $( *,?,E,E)B$c%-JJr#78;;<N<U<UV*118Lb7Q1RTTU"(//0 '
 &*"$&3&9&9#B'JJr>2556H6O6OP$++."1E+FHHI"]))* !
  $$ */C/G/G/I#/4J4N4N4P).>.B.B.D$H 00*):+>%2 /%9'=!1 1 	M "JJt':':;mKM$}4M$H GGM2UZZ5NNM$}4Mr6   r   )NNNNNNN)rr   rs   rt   r+   rC   r  rl   rv   rw   s   @r4   r  r    sb     +/,0&*1: !4Z1:  #Tz	1:
 t1:fnn "#!Gr6   r  RMSNormc                   h     e Zd Zddeddf fdZdej                  dej                  fdZd Z xZ	S )	r  r;   r   Nc                     t         |           t        j                  t	        j
                  |            | _        || _        y)z<
        EvollaRMSNorm is equivalent to T5LayerNorm
        N)rB   rC   r   r{  r,   r@  weightvariance_epsilon)rY   rF   r;   r[   s      r4   rC   zEvollaRMSNorm.__init__  s1     	ll5::k#:; #r6   r   c                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )Nry   r?   T)rU  )	re   rd   r,   float32powmeanrsqrtr  r  )rY   r   input_dtypevariances       r4   rl   zEvollaRMSNorm.forward  sy    #))%((7 $$Q',,R,>%Ht?T?T4T(UU{{]--k:::r6   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)r   r  rb   r  r9  s    r4   
extra_reprzEvollaRMSNorm.extra_repr  s*    ))*+6$2G2G1HIIr6   )gư>)
rr   rs   rt   rc   rC   r,   r   rl   r  rv   rw   s   @r4   r  r    s7    $ $$ $;U\\ ;ell ;Jr6   r  c                        e Zd ZU ej                  ed<   ddef fdZe	 	 	 ddedz  de	d   de
dz  ded	ef   fd
       Z ej                         ed               Z xZS )EvollaRotaryEmbeddingr   NrZ   c                    t         |           |j                  | _        |j                  | _        || _        | j
                  j                  d   | _        | j                  }| j                  dk7  rt        | j                     } || j
                  |      \  }| _
        | j                  d|d       | j                  d|j                         d       y )N	rope_typedefaultr   Fr@   original_inv_freq)rB   rC   rS   max_seq_len_cachedoriginal_max_seq_lenrZ   rope_parametersr  compute_default_rope_parametersr   attention_scalingrQ   clone)rY   rZ   rn   rope_init_fnr   r[   s        r4   rC   zEvollaRotaryEmbedding.__init__  s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L($(ZeD0(..2BuUr6   rn   ztorch.devicer   r   ztorch.Tensorc                    | j                   d   }t        | dd      xs | j                  | j                  z  }d}d|t	        j
                  d|dt        j                        j                  |t        j                        |z  z  z  }||fS )	a  
        Computes the inverse frequencies according to the original RoPE implementation
        Args:
            config ([`~transformers.PreTrainedConfig`]):
                The model configuration.
            device (`torch.device`):
                The device to use for initialization of the inverse frequencies.
            seq_len (`int`, *optional*):
                The current sequence length. Unused for this type of RoPE.
        Returns:
            Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
            post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
        
rope_thetahead_dimNr   r   ry   r   rn   re   )	r  rP   rF   r   r,   rR   r   rd   rc   )rZ   rn   r   baser)   attention_factorr   s          r4   r  z5EvollaRotaryEmbedding.compute_default_rope_parameters  s    & %%l3fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 )))r6   c                 N   | j                   d d d d f   j                         j                  |j                  d   dd      j	                  |j
                        }|d d d d d f   j                         }t        |j
                  j                  t              r/|j
                  j                  dk7  r|j
                  j                  nd}t        |d      5  |j                         |j                         z  j                  dd      }t        j                  ||fd	      }|j                         | j                  z  }|j                         | j                  z  }	d d d        j	                  |j                   
      	j	                  |j                   
      fS # 1 sw Y   AxY w)Nr   r?   r$   mpscpuF)device_typeenabledry   r(   r   )r   rc   rT   rb   rd   rn   r)  typestrr    r   r,   r{   r   r  r   re   )
rY   r}   r>   inv_freq_expandedposition_ids_expandedr  r   r   r   r   s
             r4   rl   zEvollaRotaryEmbedding.forward  sR    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E!((--[`J`ahhmmfkUC 	5&,,.1F1L1L1NNYYZ[]^_E))UEN3C'')d444C'')d444C		5 vvAGGv$cff177f&;;;	5 	5s   BFF$r   r   )rr   rs   rt   r,   r   r   r%   rC   staticmethodr   r+   r   rc   r  no_gradr   rl   rv   rw   s   @r4   r  r    s    llV| V  &*+/"*t#*(* t* 
~u$	%	* *: U]]_<  <r6   r  c                   $     e Zd Z fdZd Z xZS )	EvollaMLPc                    t         |           || _        |j                  | _        |j                  | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _	        t        j                  | j                  | j                  |j                        | _
        t        |j                     | _        y )NrH  )rB   rC   rZ   rF   r   r   r   mlp_bias	gate_projup_proj	down_projr	   
hidden_actact_fnrX   s     r4   rC   zEvollaMLP.__init__  s    !--!'!9!94#3#3T5K5KRXRaRabyy!1!143I3IPVP_P_`4#9#94;K;KRXRaRabV../r6   c                     | j                  | j                  | j                  |            | j                  |      z        }|S r   )r  r  r  r  )rY   r}   r  s      r4   rl   zEvollaMLP.forward  s6    NN4;;t~~a/@#ADLLQRO#ST	r6   r   rw   s   @r4   r  r    s    0r6   r  c                     | dd| j                   d   dz  f   }| d| j                   d   dz  df   }t        j                  | |fd      S )z*Rotates half the hidden dims of the input..Nr?   ry   r(   )rb   r,   r{   r|   s      r4   rotate_halfr    sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r6   rotary_pos_embc                     |j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )r`   r  )r   r   r   r   unsqueeze_dimq_embedk_embeds          r4   apply_rotary_pos_embr  &  sY    & --
&C
--
&C3w;q>C/0G3w;q>C/0GGr6   r   n_repr   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r$   N)rb   rT   r   )r   r  batchnum_key_value_headsslenr  s         r4   	repeat_kvr  @  so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr6   c                        e Zd ZdZdedef fdZ	 	 	 ddej                  de	ej                  ej                  f   dz  dej                  dz  d	e
dz  d
ee   de	ej                  ej                  f   fdZ xZS )EvollaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperrZ   r   c                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nr  r   TrH  )rB   rC   rZ   r   rP   rF   r   r  r  num_key_value_groupsr   attention_dropoutr   r   r   attention_biasq_projk_projv_projo_projrY   rZ   r   r[   s      r4   rC   zEvollaAttention.__init__P  sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r6   Nr   rU   rf   r  r   r   c                 
   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
|\  }}t        ||	||      \  }}	| |j                  |	|
| j                        \  }	}
t        j                  | j                  j                  t              } || ||	|
|f| j                  sdn| j                   | j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr?   r$   ry   r]   r   )rb   r  r%  r   r   r&  r'  r  updater   r   r   rZ   r   r   r   r#  r   r   r   r(  )rY   r   rU   rf   r  r   rp   r   r  
key_statesvalue_statesr   r   r   r   r   s                   r4   rl   zEvollaAttention.forwardg  s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&'6'='=j,X\XfXf'g$J(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r6   r   )rr   rs   rt   ru   r%   r+   rC   r,   r   r   r
   r   r   rl   rv   rw   s   @r4   r   r   L  s    G
| 
 
4 IM.2(,&)||&) #5<<#=>E&) t+	&)
 &) +,&) 
u||U\\)	*&)r6   r   c                       e Zd Zdedef fdZ	 	 	 	 	 	 	 	 	 	 	 	 ddej                  deej                  ej                  f   dz  dej                  dz  dej                  dz  d	e
dz  d
edz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  fdZ xZS )EvollaDecoderLayerrZ   r   c                    t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        |dz   t        |j                  |j                  z  d      z  dk(  rt        ||j                        | _        y y )NrZ   r   r:   r$   r   )r  )rB   rC   rF   r   	self_attnr  mlpr  rms_norm_epsinput_layernormpost_attention_layernormmaxr  aligner_num_add_layersr  adapterr)  s      r4   rC   zEvollaDecoderLayer.__init__  s    !--()LV$,V-?-?VEXEXY(5f6H6HfNaNa(b%MS!9!9V=Z=Z!Z\]^^bcc>$*$6$6DL dr6   Nr   rU   rf   r>   r  	use_cacher  r  r  r  r  r  r  r   c           
      
   |}| j                  |      } | j                  d||||||d|\  }}||z   }|}| j                  |      }| j                  |      }||z   }t	        | d      r| j                  ||||	||
||      }|S )N)r   rf   r>   r  r:  rU   r9  )r  r  r  r  r  r  r  r  r  )r5  r2  r6  r3  r   r9  )rY   r   rU   rf   r>   r  r:  r  r  r  r  r  r  r  r   r  r   s                    r4   rl   zEvollaDecoderLayer.forward  s    " !,,]; *4>> 
')%+ 3
 
q !=0 !55mD/ =04# LL*"3$7+ /#5%9- ) 	M r6   )NNNNFNNNNNNN)rr   rs   rt   r%   r+   rC   r,   r   r   r  r
   rY  rl   rv   rw   s   @r4   r/  r/    sG   |  $ IM.204(,!&1537-12648.2/33||3 #5<<#=>E3 t+	3
 &&-3 3 $;3 !<<$.3 #\\D03 ||d*3 "LL4/3 $llT13 t+3 ,3  
!3r6   r/  c                        e Zd ZU eed<   dZdZg dZdgZdZ	dZ
dZdZdZeedZ ej$                          fd       Z xZS )	EvollaPreTrainedModelrZ   r  T)r/  ru  r  r  F)r   r%  c                    | j                   j                  }t        |   |       t	        |t
              rht        j                  |j                         t        j                  |j                         t        j                  |j                  j                         y t	        |t              r#t        j                  |j                  d|       y y )Nr]   )r  std)rZ   initializer_rangerB   r(  r)  r  r*  zeros_r  r  ones_r  r  ru  normal_rZ  )rY   r   r?  r[   s      r4   r(  z#EvollaPreTrainedModel._init_weights  s    kk++f%fABKK--.KK(JJv,,334 ABLLcs; Cr6   )rr   rs   rt   r%   r   base_model_prefixsupports_gradient_checkpointingr,  _skip_keys_device_placementr-  r.  r/  _can_compile_fullgraphr0  r/  r   r1  r,   r  r(  rv   rw   s   @r4   r=  r=    so    &*#
 $5"5 N!"'+%
 U]]_< <r6   r=  c                       e Zd Zdef fdZd Zd Zeee		 	 	 	 	 	 	 	 	 	 	 	 dde
j                  dz  de
j                  dz  de
j                  dz  d	edz  d
e
j                  dz  dedz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  deez  fd                     Z xZS )EvollaModelrZ   c           	      F   t         |   |       |j                  | _        |j                  | _        t        j                  | j                  |j                  | j                        | _        t        |      | _
        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t!        |j                  |j"                        | _        t'        |dd      | _        t+        |      | _        | j/                          y c c}w )Nr  r1  r:   r  F)rB   rC   rG   r1   rE   r   rD   rF   embed_tokensr  protein_encoderr  r  r  r/  r}  r  r4  rm  rP   r  r  
rotary_embr6  r)  s      r4   rC   zEvollaModel.__init__  s     !.. ++LL&:L:LdN^N^_36Bmm "'v'?'?!@
 	 #!'
 "&"4"4&:M:MN	&-f6NPU&V#/v>s   $Dc                     | j                   S r   rK  r9  s    r4   r:  z EvollaModel.get_input_embeddings  s       r6   c                     || _         y r   rO  r<  s     r4   r=  z EvollaModel.set_input_embeddings  s
    !r6   Nr0   rf   r>   r  rg   r:  protein_input_idsprotein_attention_maskstructure_feats	msa_featsr  r  r   c                    |du |duz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }d}d}|^|\| j                  ||      }|j                  }t        j                  |j                  d   |j                  t
        j                        }t        | j                  |||	      }|}| j                  ||
      }| j                   D ]  } ||f||||||	|
|||||d|} | j#                  |      }t%        ||      }|S )a;  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
        structure_feats (torch.FloatTensor):
            The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        msa_feats (torch.FloatTensor):
            The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        structure_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
        msa_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
        Nz:You must specify exactly one of input_ids or inputs_embedsr  r   r$   r   r?  r  )rZ   rg   rf   r  )r>   )rf   r>   r  r:  r  r  r  r  r  r  r  rU   )r  r  )r   rK  r   rZ   get_seq_lengthr,   rR   rb   rn   r`   rL  r  r@  rY  r   rM  r}  rm  r   )rY   r0   rf   r>   r  rg   r:  rQ  rR  rS  rT  r  r  r   past_seen_tokensprotein_featsr  protein_outputscausal_maskr   rU   decoder_layerr   s                          r4   rl   zEvollaModel.forward  s   B -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L!(-C-O"22+5 3 O ,FFM!&!''*(//jj" );;')+	
 &"oom,oW![[ 	M)*) /#"/$3'#5%9- .$7 M	$ 		-0(++
 r6   )NNNNNNNNNNNN)rr   rs   rt   r%   rC   r:  r=  r   r!   r#   r,   r  r   r
   r   rY  r   r   rl   rv   rw   s   @r4   rI  rI    sf   | *!"  .2.204(,26!%596:48.248.2]##d*] t+] &&-	]
 ] ((4/] $;] !++d2] !&t 3] **T1] $$t+] $llT1] t+] 
(	(]    ]r6   rI  c                   4    e Zd Z fdZd Zd Zee	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dedz  deej                  z  fd              Z xZS )EvollaForProteinText2Textc                     t         |   |       t        |      | _        |j                  | _        t        j                  |j                  | j                  d      | _        | j                          y rl  )
rB   rC   rI  r  rE   r   r   rF   lm_headr6  rX   s     r4   rC   z"EvollaForProteinText2Text.__init__x  sQ      (
 ++yy!3!3T__5Qr6   c                 6    | j                   j                         S r   )r  r:  r9  s    r4   r:  z.EvollaForProteinText2Text.get_input_embeddings  s    zz..00r6   c                 8    | j                   j                  |      S r   )r  r=  r<  s     r4   r=  z.EvollaForProteinText2Text.set_input_embeddings  s    zz..u55r6   Nr0   rf   rg   labelsrQ  rR  r:  logits_to_keepc	           
      h    | j                   d||||||d|	}
|
j                  }t        |t              rt	        | d      n|}| j                  |dd|ddf         }d}|  | j                  d||| j                  d|	}t        |||
j                  |
j                  |
j                        }|S )a,  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

        Example:

        ```python
        >>> from transformers import EvollaProcessor, EvollaForProteinText2Text
        >>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
        >>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

        >>> protein_information = {
            "aa_seq": "your amino acid sequence",
            "foldseek": "your foldseek sequence",
        }
        >>> question = "What is the function of this protein?"
        >>> message = [
            {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
            {"role": "user", "content": question},
        ]

        >>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
        >>> outputs = model.generate(**inputs)

        >>> print(processor.batch_decode(outputs, skip_special_tokens=True))
        ```)r0   rf   rg   rQ  rR  r:  N)logitsrb  rE   )lossre  r  r   r%  r  )r  r  r)  r+   slicer_  loss_functionrE   r   r  r   r%  )rY   r0   rf   rg   rb  rQ  rR  r:  rc  r   outputsr   slice_indicesre  rf  
lm_outputss                   r4   rl   z!EvollaForProteinText2Text.forward  s    T ,64:: ,
)'/#9,
 ,
  118B>SV8W~ot4]kmA}a,?@A%4%%iVFtibhiD+#33!//))

 r6   )NNNNNNNr   )rr   rs   rt   rC   r:  r=  r   r   r,   r  r   r   rY  r+   rl   rv   rw   s   @r4   r]  r]  w  s    16  .2.226*.596:!%-.B##d*B t+B ((4/	B
   4'B !++d2B !&t 3B $;B ell*B  Br6   r]  )r]  rI  r=  )Nr]   )r$   )^r   collections.abcr   dataclassesr   typingr   r,   r    r   r*  activationsr	   cache_utilsr
   r   
generationr   integrationsr   r   r   masking_utilsr   r   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr    r!   utils.output_capturingr"   r#   configuration_evollar%   r&   r5   Moduler8   r   r   r   r   rc   r   r   r   r   r   r   r   r   r  r  r"  r3  rF  rj  ru  r  r  r  r  r  r  r  r  r+   r  r   r/  r=  rI  r]  __all__r  r6   r4   <module>r     s=  *  $ !    & ! . ) f f J 9  L F & I I G E <4 ^=RYY ^=B(
2*
")) *
f !%II%<<% 
% <<	%
 LL4'% T\% % '(%8M)		 M)`
RYY 
BII 8;ryy 
 
42 4nS")) S@  2/ 2 2.+
!< +
\7 		 7 tA		 A'.		 '.T <k <  <
299 
$k")) k\ Y'JBII J (J(><BII ><B		  ( *+ ,2	UU\\ 	U# 	U%,, 	U )*@)bii @) +@)FC3 CL <O < <B|' |~S 5 Sl Pr6   