
    iC                        d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(  G d de"      Z)e G d de             Z* G d de       Z+ G d de      Z,e G d de*             Z- ed        G d! d"e*e             Z. G d# d$e      Z/ G d% d&e$      Z0 G d' d(e!      Z1g d)Z2y)*zPyTorch PLBART model.    N)nn)CrossEntropyLoss   )initialization)Cache)GenerationMixin)BaseModelOutputSeq2SeqLMOutputSeq2SeqModelOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)merge_with_config_defaults)capture_outputs   )BartClassificationHeadBartDecoderBartEncoderBartForCausalLMBartScaledWordEmbedding)'BigBirdPegasusForSequenceClassification)shift_tokens_right   )PLBartConfigc                       e Zd Zy)PLBartScaledWordEmbeddingN__name__
__module____qualname__     z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/plbart/modular_plbart.pyr   r   /       r$   r   c                   F     e Zd ZU eed<   dZdZddgZdZdZ	dZ
 fdZ xZS )PLBartPreTrainedModelconfigmodelTPLBartDecoderLayerPLBartEncoderLayerc                     t         |   |       t        |t              r t	        j
                  |j                         y y N)super_init_weights
isinstancePLBartForConditionalGenerationinitzeros_final_logits_bias)selfmodule	__class__s     r%   r0   z#PLBartPreTrainedModel._init_weights=   s2    f%f<=KK001 >r$   )r    r!   r"   r   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attnr0   __classcell__r8   s   @r%   r(   r(   3   s<    &*#-/CDN2 2r$   r(   c                       e Zd Zy)PLBartEncoderNr   r#   r$   r%   rC   rC   C   r&   r$   rC   c                       e Zd Zy)PLBartDecoderNr   r#   r$   r%   rE   rE   G   r&   r$   rE   c                       e Zd ZdddZdef fdZd Zd Zee	e
	 	 	 	 	 	 	 	 	 ddej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  deej                     dz  dedz  dej                  dz  dej                  dz  dedz  dee   deej                     ez  fd                     Z xZS )PLBartModelzshared.weight)zencoder.embed_tokens.weightzdecoder.embed_tokens.weightr)   c                 J   t         |   |       |j                  |j                  }}|j                  rt        j                  |j                        nd}t        ||j                  ||      | _	        t        |      | _        t        |      | _        | j                          y )Ng      ?)embed_scale)r/   __init__pad_token_id
vocab_sizescale_embeddingmathsqrtd_modelr   sharedrC   encoderrE   decoder	post_init)r6   r)   padding_idxrL   rI   r8   s        r%   rJ   zPLBartModel.__init__R   sz     "("5"5v7H7HZ393I3Idii/s/
FNNKepq$V,$V,r$   c                     | j                   S r.   )rQ   )r6   s    r%   get_input_embeddingsz PLBartModel.get_input_embeddings^   s    {{r$   c                 ~    || _         | j                   | j                  _        | j                   | j                  _        y r.   )rQ   rR   embed_tokensrS   )r6   values     r%   set_input_embeddingsz PLBartModel.set_input_embeddingsa   s)    $(KK!$(KK!r$   N	input_idsattention_maskdecoder_input_idsdecoder_attention_maskencoder_outputspast_key_valuesinputs_embedsdecoder_inputs_embeds	use_cachekwargsreturnc
                 
   |"| t        || j                  j                        }| | j                  d	|||d|
}nGt	        |t
              s7t        |d   t        |      dkD  r|d   ndt        |      dkD  r|d   nd      } | j                  d	|||d   ||||	d|
}t        |j                  |j                  |j                  |j                  |j                  |j                  |j                  |j                        S )
a  
        decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
            Indices of decoder input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`] or [`PLBartMultiTokenizer`] depending on the checkpoint.
            See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.

            [What are decoder input IDs?](../glossary#decoder-input-ids)

            PLBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
            varies according to source and target language, *e.g.* 50003 for *en_XX*, and 50001 for *java*. If
            `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
            `past_key_values`).

            For translation and summarization training, `decoder_input_ids` should be provided. If no
            `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
            for denoising pre-training following the paper.
        decoder_attention_mask (:
            obj:*torch.LongTensor* of shape `(batch_size, target_sequence_length)`, *optional*):
            Default behavior:
            generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default.
        N)r\   r]   rb   r   r   r   )last_hidden_statehidden_states
attentions)r\   r]   encoder_hidden_statesencoder_attention_maskra   rb   rd   )rh   ra   decoder_hidden_statesdecoder_attentionscross_attentionsencoder_last_hidden_staterk   encoder_attentionsr#   )r   r)   rK   rR   r1   r	   lenrS   r   rh   ra   ri   rj   ro   )r6   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   decoder_outputss               r%   forwardzPLBartModel.forwardf   s3   P $)>)F 29dkk>V>V W"/;t|| 0#-+0 	0O O_=-"1!"4474H14Loa0RV14_1E1I?1-tO '$,, 	
'1"1!"4#1+/	
 	
 "-??+;;"1"?"?.99,==&5&G&G"1"?"?.99	
 		
r$   )	NNNNNNNNN)r    r!   r"   _tied_weights_keysr   rJ   rW   r[   r   r   r   torch
LongTensorTensorlistFloatTensorr   boolr   r   tupler   rt   r@   rA   s   @r%   rG   rG   K   sP    (7'6

| 
0
   .226596::>(,26:>!%J
##d*J
 ((4/J
 !++d2	J

 !&t 3J
 e//047J
 J
 ((4/J
  %0047J
 $;J
 +,J
 
u||	1	1J
    J
r$   rG   zv
    The PLBART Model with a language modeling head. Can be used for code-to-text, text-to-code and code-to-code.
    )custom_introc                       e Zd ZdZdgZddiZdef fdZ	 dded	edz  d
e	de
j                  f fdZdeddfdZeee	 	 	 	 	 	 	 	 	 	 ddej$                  dz  dej$                  dz  dej$                  dz  dej&                  dz  deej*                     dz  dedz  dej*                  dz  dej*                  dz  dej&                  dz  de	dz  dee   deej&                     ez  fd                     Zdej&                  fdZ xZS )r2   r*   r5   zlm_head.weightzmodel.shared.weightr)   c                 x   t         |   |       t        |      | _        | j	                  dt        j                  d| j                  j                  j                  f             t        j                  |j                  | j                  j                  j                  d      | _        | j                          y )Nr5   r   F)bias)r/   rJ   rG   r*   register_bufferrv   zerosrQ   num_embeddingsr   LinearrP   lm_headrT   )r6   r)   r8   s     r%   rJ   z'PLBartForConditionalGeneration.__init__   s~      (
0%++q$**BSBSBbBb>c2deyy1B1B1Q1QX]^r$   Nnew_num_tokenspad_to_multiple_ofmean_resizingrf   c                 z    t         |   |||      }| j                  |j                  j                  d          |S )Nr   )r/   resize_token_embeddings_resize_final_logits_biasweightshape)r6   r   r   r   new_embeddingsr8   s        r%   r   z6PLBartForConditionalGeneration.resize_token_embeddings   s?     8I[]jk&&~'<'<'B'B1'EFr$   c                 6   | j                   j                  d   }||k  r| j                   d d d |f   }nSt        j                  d||z
  f| j                   j                        }t        j
                  | j                   |gd      }| j                  d|       y )Nr   )device)dimr5   )r5   r   rv   r   r   catr   )r6   r   old_num_tokensnew_bias
extra_biass        r%   r   z8PLBartForConditionalGeneration._resize_final_logits_bias   s    //55b9^+--a..@AHa.)H%IRVRhRhRoRopJyy$"8"8*!E1MH0(;r$   r\   r]   r^   r_   r`   ra   rb   rc   labelsrd   re   c                 T   |	$|"| t        |	| j                  j                        } | j                  |f||||||||
d|}| j	                  |j
                        }|| j                  j                  |j                        z   }d}|	Ft               } ||j                  d| j                  j                        |	j                  d            }t        |||j                  |j                  |j                  |j                   |j"                  |j$                  |j&                  	      S )a
  
        decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
            Indices of decoder input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`] or [`PLBartMultiTokenizer`] depending on the checkpoint.
            See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.

            [What are decoder input IDs?](../glossary#decoder-input-ids)

            PLBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
            varies according to source and target language, *e.g.* 50003 for *en_XX*, and 50001 for *java*. If
            `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
            `past_key_values`).

            For translation and summarization training, `decoder_input_ids` should be provided. If no
            `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
            for denoising pre-training following the paper.
        decoder_attention_mask (:
            obj:*torch.LongTensor* of shape `(batch_size, target_sequence_length)`, *optional*):
            Default behavior:
            generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example Mask-filling:

        ```python
        >>> from transformers import AutoTokenizer, PLBartForConditionalGeneration

        >>> model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-base")
        >>> tokenizer = AutoTokenizer.from_pretrained("uclanlp/plbart-base")

        >>> # en_XX is the language symbol id <LID> for English
        >>> TXT = "<s> Is 0 the <mask> Fibonacci number ? </s> en_XX"
        >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt").input_ids

        >>> logits = model(input_ids).logits
        >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
        >>> probs = logits[0, masked_index].softmax(dim=0)
        >>> values, predictions = probs.topk(5)

        >>> tokenizer.decode(predictions).split()
        ['first', 'same', 'highest', 'result', 'number']
        ```
        N)r]   r^   r`   r_   ra   rb   rc   rd   r   )	losslogitsra   rm   rn   ro   rp   rk   rq   )r   r)   rK   r*   r   rh   r5   tor   r   viewrL   r
   ra   rm   rn   ro   rp   rk   rq   )r6   r\   r]   r^   r_   r`   ra   rb   rc   r   rd   re   outputs	lm_logitsmasked_lm_lossloss_fcts                   r%   rt   z&PLBartForConditionalGeneration.forward   s2   @  (-B-J$6vt{{?W?W$X!&0djj'
)/+#9+'"7'
 '
 LL!:!:;	 6 6 9 9):J:J KK	')H%innR9O9O&PRXR]R]^`RabN#33")"?"?&99$55&-&G&G")"?"?&99

 
	
r$   c                 B    t        || j                  j                        S r.   )r   r)   rK   )r6   r   s     r%   %prepare_decoder_input_ids_from_labelszDPLBartForConditionalGeneration.prepare_decoder_input_ids_from_labels>  s    !&$++*B*BCCr$   )NT)
NNNNNNNNNN)r    r!   r"   r:   _keys_to_ignore_on_load_missingru   r   rJ   intr{   r   	Embeddingr   r   r   r   r   rv   rw   rx   ry   rz   r   r   r   r|   r
   rt   r   r@   rA   s   @r%   r2   r2      s     ':&;#/|  ae!7:TzY]	< < <   .226596::>(,26:>&*!%_
##d*_
 ((4/_
 !++d2	_

 !&t 3_
 e//047_
 _
 ((4/_
  %0047_
 t#_
 $;_
 +,_
 
u||		._
    _
BDELL Dr$   r2   c                       e Zd Zy)PLBartClassificationHeadNr   r#   r$   r%   r   r   B  r&   r$   r   c                        e Zd Z fdZ xZS )PLBartForSequenceClassificationc                  :     t               j                  di |  y)a  
        decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
            Indices of decoder input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`] or [`PLBartMultiTokenizer`] depending on the checkpoint.
            See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.

            [What are decoder input IDs?](../glossary#decoder-input-ids)

            PLBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
            varies according to source and target language, *e.g.* 50003 for *en_XX*, and 50001 for *java*. If
            `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
            `past_key_values`).

            For translation and summarization training, `decoder_input_ids` should be provided. If no
            `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
            for denoising pre-training following the paper.
        decoder_attention_mask (:
            obj:*torch.LongTensor* of shape `(batch_size, target_sequence_length)`, *optional*):
            Default behavior:
            generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr#   r/   rt   super_kwargsr8   s    r%   rt   z'PLBartForSequenceClassification.forwardG  s    4 	','r$   )r    r!   r"   rt   r@   rA   s   @r%   r   r   F  s    ( (r$   r   c                   2     e Zd Zee fd              Z xZS )PLBartForCausalLMc                  :     t               j                  di |  y)aF  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, PLBartForCausalLM

        >>> tokenizer = AutoTokenizer.from_pretrained("uclanlp/plbart-base")
        >>> model = PLBartForCausalLM.from_pretrained("uclanlp/plbart-base")
        >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> logits = outputs.logits
        >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
        >>> list(logits.shape) == expected_shape
        True
        ```Nr#   r   r   s    r%   rt   zPLBartForCausalLM.forwarde  s    2 	','r$   )r    r!   r"   r   r   rt   r@   rA   s   @r%   r   r   d  s    (  (r$   r   )r   r2   r   rG   r(   )3__doc__rN   rv   r   torch.nnr    r   r3   cache_utilsr   
generationr   modeling_outputsr	   r
   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   bart.modeling_bartr   r   r   r   r   (bigbird_pegasus.modeling_bigbird_pegasusr   mbart.modeling_mbartr   configuration_plbartr   r   r(   rC   rE   rG   r2   r   r   r   __all__r#   r$   r%   <module>r      s       % &   ) 
 . & I I 7 5  _ 5 .	 7 	 2O 2 2	K 		K 	 g
' g
 g
T 
DD%:O DD
DDN	5 	(&M (<( (:r$   