
    i                         d dl mZ d dlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ dd	lmZ d
dlmZ  ej                   e      Ze G d de             Ze
 G d de             ZdgZy)    )	dataclassN   )Cache)$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )AutoModelForImageTextToText   )ShieldGemma2Configc                   :    e Zd ZU dZdZej                  dz  ed<   y)0ShieldGemma2ImageClassifierOutputWithNoAttentionz^ShieldGemma2 classifies imags as violative or not relative to a specific policy
    Args:
    Nprobabilities)__name__
__module____qualname____doc__r   torchTensor__annotations__     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/shieldgemma2/modeling_shieldgemma2.pyr   r   !   s     *.M5<<$&-r   r   c                       e Zd ZU eed<   dZdZdef fdZd Zd Z	d Z
d Ze	 	 	 	 	 	 	 	 	 	 	 	 	 dd
ej                  d	z  dej                  d	z  dej                   d	z  dej                  d	z  ded	z  dej                  d	z  dej                  d	z  dej                  d	z  ded	z  ded	z  ded	z  ded	z  deej                   z  defd       Z xZS )"ShieldGemma2ForImageClassificationconfig)imagetextmodelc                     t         |   |       t        |dd      | _        t        |dd      | _        t        j                  |      | _        | j                          y )N)r   yes_token_indexi *  no_token_indexi  )	super__init__getattrr"   r#   r   from_configr    	post_init)selfr   	__class__s     r   r%   z+ShieldGemma2ForImageClassification.__init__0   sS    '&v/@&I%f.>E0<<FK
r   c                 R    | j                   j                         j                         S N)r    get_decoderget_input_embeddingsr)   s    r   r.   z7ShieldGemma2ForImageClassification.get_input_embeddings7   s    zz%%'<<>>r   c                 V    | j                   j                         j                  |       y r,   )r    r-   set_input_embeddings)r)   values     r   r1   z7ShieldGemma2ForImageClassification.set_input_embeddings:   s    

 55e<r   c                 R    | j                   j                         j                         S r,   )r    r-   get_output_embeddingsr/   s    r   r4   z8ShieldGemma2ForImageClassification.get_output_embeddings=   s    zz%%'==??r   c                 V    | j                   j                         j                  |       y r,   )r    r-   set_output_embeddings)r)   new_embeddingss     r   r6   z8ShieldGemma2ForImageClassification.set_output_embeddings@   s    

 66~Fr   N	input_idspixel_valuesattention_maskposition_idspast_key_valuestoken_type_idsinputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictlogits_to_keepreturnc                      | j                   d|||||||||	|
|||d|}|j                  }|ddd| j                  | j                  gf   }t	        j
                  |d      }t        ||      S )aY  
        Returns:
            A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
            associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
            following properties.

                *   `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
                    the logits for the `No` token.
                *   `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the probability of predicting the `Yes` token and the second position
                    along dim=1 is the probability of predicting the `No` token.

            ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
            policy as described. If you are only interested in the violative condition, use
            `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.

            When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
            and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
        )r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   N)dim)logitsr   r   )r    rI   r"   r#   r   softmaxr   )r)   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   	lm_kwargsoutputsrI   selected_logitsr   s                      r   forwardz*ShieldGemma2ForImageClassification.forwardC   s    L $** 
%)%+)'/!5#)
 
   B)=)=t?R?R(S!STo2>?"'
 	
r   )NNNNNNNNNNNNr   )r   r   r   r   r   input_modalitiesbase_model_prefixr%   r.   r1   r4   r6   r   r   
LongTensorFloatTensorr   r   boolintr   rN   __classcell__)r*   s   @r   r   r   *   sg   (1 ?=@G  .215.204(,2626*.!%)-,0#'-.;
##d*;
 ''$.;
 t+	;

 &&-;
 ;
 ((4/;
 ((4/;
   4';
 $;;
  $;;
 #Tk;
 D[;
 ell*;
  
:!;
 ;
r   r   )dataclassesr   r   cache_utilsr   modeling_outputsr   modeling_utilsr   utilsr   r	   autor   configuration_shieldgemma2r   
get_loggerr   loggerr   r   __all__r   r   r   <module>r`      s~    "    D - / : 
		H	% .7[ . . T
 T
 T
p )r   