
    i4                        d dl Z d dl mZ d dlmZmZmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZmZmZmZ  ed      e G d de                    Z G d de      Z  G d de      Z! G d de      Z" G d de      Z#e G d de"             Z$e G d de"             Z%e G d d e"             Z&g d!Z'y)"    N)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )strict)create_bidirectional_mask)BaseModelOutputMaskedLMOutputSequenceClassifierOutputTokenClassifierOutput)RopeParameters)Unpack)auto_docstring)TransformersKwargscan_return_tuple   )LlamaConfig)LlamaAttention
LlamaModelLlamaPreTrainedModelLlamaRMSNormzEuroBERT/EuroBERT-210m)
checkpointc                       e Zd ZU dZdZdZeed<   dZeed<   dZ	eed<   d	Z
eed
<   d	Zeed<   dZedz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZeee   z  dz  ed<   dZedz  ed<   dZeed<   dZeed<   dZeed <   dZeez  dz  ed!<   dZeed"<   d#Zeez  ed$<   dZ eed%<   dZ!edz  ed&<   d'Z"eed(<    fd)Z# xZ$S )*EuroBertConfiga  
    mask_token_id (`int`, *optional*, defaults to 128002):
        Mask token id.
    classifier_pooling (`str`, *optional*, defaults to `"late"`):
        The pooling strategy to use for the classifier. Can be one of ['bos', 'mean', 'late'].

    ```python
    >>> from transformers import EuroBertModel, EuroBertConfig

    >>> # Initializing a EuroBert eurobert-base style configuration
    >>> configuration = EuroBertConfig()

    >>> # Initializing a model from the eurobert-base style configuration
    >>> model = EuroBertModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```euroberti  
vocab_sizei   hidden_sizei   intermediate_size   num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti    max_position_embeddingsg{Gz?initializer_rangeh㈵>rms_norm_epsi  bos_token_idi eos_token_idpad_token_idi mask_token_id   pretraining_tpFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutmlp_biashead_dimlateclassifier_poolingc                 ^    | j                   | j                  | _         t        |   di | y )N )r#   r"   super__post_init__)selfkwargs	__class__s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/eurobert/modular_eurobert.pyr;   zEuroBertConfig.__post_init__N   s-    ##+'+'?'?D$''    )%__name__
__module____qualname____doc__
model_typer   int__annotations__r   r   r!   r"   r#   r%   strr&   r'   floatr)   r*   r+   listr,   r-   r/   r0   boolr1   r   dictr2   r3   r4   r5   r7   r;   __classcell__r>   s   @r?   r   r      s)   & JJK!s!s!!&*t*J#'S'#u#L%%L#*%+1L#S	/D(1%L#*%M3NC %%48O^d*T18 ND %(sU{(HdHcDj$$( (r@   r   c                         e Zd Zd fd	Z xZS )EuroBertRMSNormc                 &    t         |   ||       y N)r:   __init__)r<   r   epsr>   s      r?   rS   zEuroBertRMSNorm.__init__U   s    c*r@   )r(   )rA   rB   rC   rS   rM   rN   s   @r?   rP   rP   T   s    + +r@   rP   c                   (     e Zd Zdedef fdZ xZS )EuroBertAttentionconfig	layer_idxc                 4    t         |   ||       d| _        y )NF)r:   rS   	is_causal)r<   rW   rX   r>   s      r?   rS   zEuroBertAttention.__init__Z   s    +r@   )rA   rB   rC   r   rF   rS   rM   rN   s   @r?   rV   rV   Y   s    ~ #  r@   rV   c                       e Zd Zy)EuroBertPreTrainedModelN)rA   rB   rC   r9   r@   r?   r\   r\   _   s    r@   r\   c                       e Zd Z	 	 	 	 d	dej                  dej
                  dz  dej                  dz  dej                  dz  dee   de	e
z  fdZy)
EuroBertModelN	input_idsattention_maskposition_idsinputs_embedsr=   returnc                    |d u |d uz  rt        d      || j                  |      }|=t        j                  |j                  d   |j
                        j                  d      }t        | j                  ||      }|}| j                  ||      }| j                  d | j                  j                   D ]  }	 |	|f|||d|} | j                  |      }t        |      S )	Nz:You must specify exactly one of input_ids or inputs_embedsr.   )devicer   )rW   rb   r`   )ra   )r`   position_embeddingsra   )last_hidden_state)
ValueErrorembed_tokenstorcharangeshapere   	unsqueezer	   rW   
rotary_emblayersr!   normr
   )
r<   r_   r`   ra   rb   r=   bidirectional_maskhidden_statesrf   encoder_layers
             r?   forwardzEuroBertModel.forwardd   s    -t";<YZZ *.*;*;I*FM <<(;(;A(>}G[G[\ffghiL6;;')
 &"oom,oW![[)H4;;+H+HI 	M)1$7)	
 M	 		-0+
 	
r@   )NNNN)rA   rB   rC   rj   
LongTensorTensorFloatTensorr   r   tupler
   rt   r9   r@   r?   r^   r^   c   s     '+.20426&
##&
 t+&
 &&-	&

 ((4/&
 +,&
 
	 &
r@   r^   c                   0    e Zd ZddiZddiZddgdgfiZdef fdZee		 	 	 	 	 dd
e
j                  d	z  de
j                  d	z  de
j                  d	z  de
j                  d	z  de
j                  d	z  dee   dee
j                     ez  fd              Z xZS )EuroBertForMaskedLMzlm_head.weightzmodel.embed_tokens.weightlm_headcolwise_gather_outputrr   logitsrW   c                     t         |   |       t        |      | _        t	        j
                  |j                  |j                  |j                        | _	        | j                          y rR   )r:   rS   r^   modelr   Linearr   r   r4   r{   	post_initr<   rW   r>   s     r?   rS   zEuroBertForMaskedLM.__init__   sL     "6*
yy!3!3V5F5FX 	r@   Nr_   r`   ra   rb   labelsr=   rc   c                     | j                   d||||d|}| j                  |j                        }d}	|* | j                  d||| j                  j
                  d|}	t        |	||j                  |j                        S )a)  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, EuroBertForMaskedLM

        >>> model = EuroBertForMaskedLM.from_pretrained("EuroBERT/EuroBERT-210m")
        >>> tokenizer = AutoTokenizer.from_pretrained("EuroBERT/EuroBERT-210m")

        >>> text = "The capital of France is <|mask|>."
        >>> inputs = tokenizer(text, return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> # To get predictions for the mask:
        >>> masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
        >>> predicted_token_id = outputs.logits[0, masked_index].argmax(axis=-1)
        >>> predicted_token = tokenizer.decode(predicted_token_id)
        >>> print("Predicted token:", predicted_token)
        Predicted token:  Paris
        ```)r_   r`   ra   rb   N)r}   r   r   lossr}   rr   
attentionsr9   )	r   r{   rg   loss_functionrW   r   r   rr   r   )
r<   r_   r`   ra   rb   r   r=   outputsr}   r   s
             r?   rt   zEuroBertForMaskedLM.forward   s    > $.4:: $
)%'	$

 $
 g778%4%%pVFt{{OeOepiopD!//))	
 	
r@   NNNNN)rA   rB   rC   _tied_weights_keys_tp_plan_pp_planr   rS   r   r   rj   ru   rv   rw   r   r   rx   r   rt   rM   rN   s   @r?   rz   rz      s    *,GH23H_-z:;H~   .2.20426*./
##d*/
 t+/
 &&-	/

 ((4//
   4'/
 +,/
 
u||	~	-/
  /
r@   rz   c                       e Zd Zdef fdZee	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ee   d
eej                     ez  fd              Z xZS )!EuroBertForSequenceClassificationrW   c                    t         |   |       |j                  | _        |j                  | _        t	        |      | _        t        j                  |j                  |j                        | _	        t        j                         | _        t        j                  |j                  | j                        | _        | j                          y rR   )r:   rS   
num_labelsr7   r^   r   r   r   r   denseGELU
activation
classifierr   r   s     r?   rS   z*EuroBertForSequenceClassification.__init__   s      ++"(";";"6*
YYv1163E3EF
'')))F$6$6Hr@   Nr_   r`   ra   rb   r   r=   rc   c                     | j                   |f|||d|}|d   }| j                  dv r| j                  dk(  r
|d d df   }	ny| j                  dk(  rj||j                  d      }	nU|j                  |j                        }||j                  d      z  j                  d      }	|	|j                  dd	
      z  }	| j                  	      }	| j                  |	      }	| j                  |	      }
n| j                  dk(  r| j                  |      }| j                  |      }| j                  |      }
||
j                  d      }
nU|j                  |
j                        }|
|j                  d      z  j                  d      }
|
|j                  dd	
      z  }
d }||j                  
j                        }| j                  j                  | j                  dk(  rd| j                  _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                   k(  rd| j                  _        nd| j                  _        | j                  j                  dk(  rIt#               }| j                  dk(  r& ||
j%                         |j%                               }n ||
|      }n| j                  j                  dk(  r=t'               } ||
j)                  d| j                        |j)                  d            }n,| j                  j                  dk(  rt+               } ||
|      }t-        |
|j.                  |j0                        S )Nr`   ra   rb   r   )bosmeanr   r   r.   )dimT)r   keepdimr6   
regressionsingle_label_classificationmulti_label_classificationr   )r   r7   r   tore   rm   sumr   r   r   rW   problem_typer   dtyperj   longrF   r   squeezer   viewr   r   rr   r   )r<   r_   r`   ra   rb   r   r=   encoder_outputrg   pooled_outputr}   xr   loss_fcts                 r?   rt   z)EuroBertForSequenceClassification.forward   s    $
)%'	

 
 +1-""o5&&%/ 1!Q$ 7((F2!)$5$:$:q$:$AM%3%6%67H7O7O%PN%69Q9QRT9U%U$Z$Z_`$Z$aM!^%7%7At%7%LLM JJ}5M OOM:M__]3F$$.

,-A"A__Q'F%+!/!2!26==!A >#;#;B#??DDDK.,,D,AAYYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./'(66%00	
 	
r@   r   )rA   rB   rC   r   rS   r   r   rj   ru   rv   rw   r   r   rx   r   rt   rM   rN   s   @r?   r   r      s    	~ 	  .2.20426*.J
##d*J
 t+J
 &&-	J

 ((4/J
   4'J
 +,J
 
u||	7	7J
  J
r@   r   c                       e Zd Zdef fdZd Zd Zee	 	 	 	 	 dde	j                  dz  de	j                  dz  de	j                  dz  d	e	j                  dz  d
e	j                  dz  dee   deez  fd              Z xZS )EuroBertForTokenClassificationrW   c                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y rR   )
r:   rS   r   r^   r   r   r   r   r   r   r   s     r?   rS   z'EuroBertForTokenClassification.__init__-  sQ      ++"6*
))F$6$68I8IJr@   c                 .    | j                   j                  S rR   r   ri   )r<   s    r?   get_input_embeddingsz3EuroBertForTokenClassification.get_input_embeddings5  s    zz&&&r@   c                 &    || j                   _        y rR   r   )r<   values     r?   set_input_embeddingsz3EuroBertForTokenClassification.set_input_embeddings8  s    "'

r@   Nr_   r`   ra   rb   r   r=   rc   c                 "    | j                   |f|||d|}|d   }| j                  |      }	d}
|<t               } ||	j                  d| j                        |j                  d            }
t        |
|	|j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        r   r   Nr   r   )r   r   r   r   r   r   rr   r   )r<   r_   r`   ra   rb   r   r=   r   sequence_outputr}   r   r   s               r?   rt   z&EuroBertForTokenClassification.forward;  s    " $**
)%'	

 
 "!*1')HFKKDOO<fkk"oND$!//))	
 	
r@   r   )rA   rB   rC   r   rS   r   r   r   r   rj   ru   rv   rw   r   r   rx   r   rt   rM   rN   s   @r?   r   r   +  s    ~ '(  .2.20426*.#
##d*#
 t+#
 &&-	#

 ((4/#
   4'#
 +,#
 
&	&#
  #
r@   r   )r   r\   r^   rz   r   r   )(rj   r   torch.nnr   r   r   configuration_utilsr   masking_utilsr	   modeling_outputsr
   r   r   r   modeling_rope_utilsr   processing_utilsr   utilsr   utils.genericr   r   llamar   llama.modeling_llamar   r   r   r   r   rP   rV   r\   r^   rz   r   r   __all__r9   r@   r?   <module>r      s       A A ) 6 p p 1 & # A  a a 340([ 0(  50(f+l +
 	2 	'
J '
T >
1 >
 >
B X
(? X
 X
v 4
%< 4
 4
nr@   