
    iD              	          d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZmZmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlmZ ddlmZ  ej>                  e       Z!d/dejD                  de#de$dejD                  fdZ% G d dejL                        Z' G d dejL                        Z( G d dejR                        Z* G d dejL                        Z+ G d dejL                        Z, G d d ejL                        Z-e G d! d"e             Z. G d# d$e.      Z/e G d% d&e.             Z0 ed'(       G d) d*e.             Z1 ed+(       G d, d-e	e.             Z2g d.Z3y)0zPyTorch ConvNextV2 model.    N)nn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)can_return_tuplemerge_with_config_defaults)capture_outputs   )ConvNextV2Configinput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )zc
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/convnextv2/modeling_convnextv2.py	drop_pathr)   (   s    
 CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dedz  ddf fdZdej                  dej                  fdZde	fdZ
 xZS )
ConvNextV2DropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r(   r0   zConvNextV2DropPath.__init__;   s    "r*   hidden_statesc                 D    t        || j                  | j                        S r.   )r)   r   r   )r1   r3   s     r(   forwardzConvNextV2DropPath.forward?   s    FFr*   c                      d| j                    S )Nzp=)r   )r1   s    r(   
extra_reprzConvNextV2DropPath.extra_reprB   s    DNN#$$r*   r.   )__name__
__module____qualname____doc__floatr0   r!   Tensorr5   strr7   __classcell__r2   s   @r(   r,   r,   8   sG    b#%$, #$ #GU\\ Gell G%C %r*   r,   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )ConvNextV2GRNz)GRN (Global Response Normalization) layerdimc                     t         |           t        j                  t	        j
                  ddd|            | _        t        j                  t	        j
                  ddd|            | _        y )Nr   )r/   r0   r   	Parameterr!   zerosweightbias)r1   rC   r2   s     r(   r0   zConvNextV2GRN.__init__I   sL    ll5;;q!Q#<=LLQ1c!:;	r*   r3   r   c                     t         j                  j                  |ddd      }||j                  dd      dz   z  }| j                  ||z  z  | j
                  z   |z   }|S )N   )r   rJ   T)ordrC   keepdim)rC   rL   ư>)r!   linalgvector_normmeanrG   rH   )r1   r3   global_featuresnorm_featuress       r(   r5   zConvNextV2GRN.forwardN   si    ,,22=aV]a2b'?+?+?BPT+?+UX\+\]}}'DE		QTaar*   )
r8   r9   r:   r;   intr0   r!   FloatTensorr5   r?   r@   s   @r(   rB   rB   F   s1    3<C <
U%6%6 5;L;L r*   rB   c                   f     e Zd ZdZddd fd
Zdej                  dej                  f fdZ xZS )	ConvNextV2LayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    rN   channels_lastepsdata_formatc                \    t        |   |fd|i| |dvrt        d|       || _        y )NrZ   )rX   channels_firstzUnsupported data format: )r/   r0   NotImplementedErrorr[   )r1   normalized_shaperZ   r[   kwargsr2   s        r(   r0   zConvNextV2LayerNorm.__init__^   s?    )=s=f=AA%(A+&OPP&r*   featuresr   c                     | j                   dk(  r9|j                  dddd      }t        |   |      }|j                  dddd      }|S t        |   |      }|S )z
        Args:
            features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
        r]   r   rJ   r   r   )r[   permuter/   r5   )r1   ra   r2   s     r(   r5   zConvNextV2LayerNorm.forwardd   sj    
 //''1a3Hwx0H''1a3H  wx0Hr*   	r8   r9   r:   r;   r0   r!   r=   r5   r?   r@   s   @r(   rW   rW   X   s4    
 15/ '   r*   rW   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZ	S )ConvNextV2EmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t         |           t        j                  |j                  |j
                  d   |j                  |j                        | _        t        |j
                  d   dd      | _	        |j                  | _        y )Nr   kernel_sizestriderN   r]   rY   )
r/   r0   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsrW   	layernormr1   configr2   s     r(   r0   zConvNextV2Embeddings.__init__x   sr     "		!4!4Q!7VEVEV_e_p_p!
 -V-@-@-C[kl"//r*   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rl   
ValueErrorro   rp   )r1   rs   rl   
embeddingss       r(   r5   zConvNextV2Embeddings.forward   sV    #))!,4,,,w  **<8
^^J/
r*   )
r8   r9   r:   r;   r0   r!   rU   r=   r5   r?   r@   s   @r(   rf   rf   s   s*    0E$5$5 %,, r*   rf   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextV2Layera5  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    c                    t         |           t        j                  ||dd|      | _        t        |d      | _        t        j                  |d|z        | _        t        |j                     | _        t        d|z        | _        t        j                  d|z  |      | _        |dkD  rt        |      | _        y t        j                          | _        y )N   r   )ri   paddinggroupsrN   rZ      r   )r/   r0   r   rk   dwconvrW   rp   Linearpwconv1r   
hidden_actactrB   grnpwconv2r,   Identityr)   )r1   rr   rC   r)   r2   s       r(   r0   zConvNextV2Layer.__init__   s    iiSa3O,Sd;yya#g.&++, S)yyS#.:Cc/+I6r{{}r*   ra   r   c                 N   |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  |      }|j                  dddd      }|| j                  |      z   }|S )Nr   rJ   r   r   )r   rc   rp   r   r   r   r   r)   )r1   ra   residuals      r(   r5   zConvNextV2Layer.forward   s    ;;x(##Aq!Q/>>(+<<)88H%88H%<<)##Aq!Q/dnnX66r*   )r   rd   r@   s   @r(   rx   rx      s)    
]  r*   rx   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextV2Stagea  ConvNeXTV2 stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`list[float]`): Stochastic depth rates for each layer.
    c                    t         	|           ||k7  s|dkD  r@t        j                  t	        |dd      t        j
                  ||||      g      | _        nt        j                         | _        |xs dg|z  }t        j                  t        |      D cg c]  }t        ||||          c}      | _	        y c c}w )Nr   rN   r]   rY   rh   r   )rC   r)   )
r/   r0   r   
ModuleListrW   rk   downsampling_layerrangerx   layers)
r1   rr   in_channelsout_channelsri   rj   depthdrop_path_ratesjr2   s
            r(   r0   zConvNextV2Stage.__init__   s    ,&&1*&(mm'K[\IIk<[Y_`'D# ')mmoD#):cUU]mm^cdi^jkYZ_VQRASTk
ks   B>ra   r   c                 j    | j                   D ]
  } ||      } | j                  D ]
  } ||      } |S r.   )r   r   )r1   ra   layers      r(   r5   zConvNextV2Stage.forward   sA    ,, 	'EXH	'[[ 	'EXH	'r*   )rJ   rJ   rJ   Nrd   r@   s   @r(   r   r      s(    
"  r*   r   c                   d     e Zd ZU eed<   dZdZdZdgZ e	j                          fd       Z xZS )ConvNextV2PreTrainedModelrr   
convnextv2rs   )imagerx   c                     t         |   |       t        |t              r?t	        j
                  |j                         t	        j
                  |j                         yy)zInitialize the weightsN)r/   _init_weights
isinstancerB   initzeros_rG   rH   )r1   moduler2   s     r(   r   z'ConvNextV2PreTrainedModel._init_weights   sA     	f%fm,KK&KK$ -r*   )r8   r9   r:   r   __annotations__base_model_prefixmain_input_nameinput_modalities_no_split_modulesr!   no_gradr   r?   r@   s   @r(   r   r      s=    $$O!*+U]]_% %r*   r   c                   z     e Zd ZdZdeiZ fdZe ed      de	j                  dee   defd              Z xZS )ConvNextV2Encoderr3   c           
      N   t         |   |       t        j                         | _        t        j                  d|j                  t        |j                        d      j                  |j                        D cg c]  }|j                          }}|j                  d   }t        |j                        D ]V  }|j                  |   }t        ||||dkD  rdnd|j                  |   ||         }| j                  j!                  |       |}X | j#                          y c c}w )Nr   cpu)r   rJ   r   )r   r   rj   r   r   )r/   r0   r   r   stagesr!   linspacedrop_path_ratesumdepthssplittolistrm   r   
num_stagesr   append	post_init)	r1   rr   xr   prev_chsiout_chsstager2   s	           r(   r0   zConvNextV2Encoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
 HHJ
 
 &&q)v(() 	A))!,G#$$EqqmmA& / 2E KKu%H	 	%
s   ;D"F)tie_last_hidden_statesr`   r   c                 L    | j                   D ]
  } ||      } t        |      S )N)last_hidden_state)r   r
   )r1   r3   r`   layer_modules       r(   r5   zConvNextV2Encoder.forward  s.     !KK 	8L(7M	8 .NNr*   )r8   r9   r:   r   r   _can_record_outputsr0   r   r   r!   r=   r   r   r
   r5   r?   r@   s   @r(   r   r      sd    %O*O<.  E2O||O +,O 
(	O 3  Or*   r   c            	       j     e Zd Z fdZee	 ddej                  dz  dee	   de
fd              Z xZS )ConvNextV2Modelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  |j                  d   |j                        | _        | j                          y )NrM   r}   )r/   r0   rr   rf   rv   r   encoderr   	LayerNormrm   layer_norm_epsrp   r   rq   s     r(   r0   zConvNextV2Model.__init__  s`     .v6(0 f&9&9"&=6CXCXY 	r*   Nrs   r`   r   c                     |t        d      | j                  |      } | j                  |fi |}|j                  }| j	                  |j                  ddg            }t        |||j                        S )Nz You have to specify pixel_valuesrM   )r   pooler_outputr3   )ru   rv   r   r   rp   rQ   r   r3   )r1   rs   r`   embedding_outputencoder_outputsr   pooled_outputs          r(   r5   zConvNextV2Model.forward%  s    
 ?@@??<8:F$,,GW:b[a:b+== '8'='=r2h'GH7/')77
 	
r*   r.   )r8   r9   r:   r0   r   r   r!   rU   r   r   r   r5   r?   r@   s   @r(   r   r     sP     7;
!--4
GMN`Ga
	1
  
r*   r   z
    ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc            	            e Zd ZdZ fdZee	 ddej                  dz  dej                  dz  de
fd              Z xZS )	 ConvNextV2ForImageClassificationFc                 <   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r3t        j                  |j                  d   |j                        | _        nt        j                         | _        | j                          y )Nr   rM   )r/   r0   
num_labelsr   r   r   r   rm   
classifierr   r   rq   s     r(   r0   z)ConvNextV2ForImageClassification.__init__E  su      ++)&1 q  ii(;(;B(?ARARSDO kkmDO 	r*   Nrs   labelsr   c                      | j                   |fi |}|j                  }| j                  |      }d}|| j                  ||| j                        }t        |||j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        N)r   pooled_logitsrr   )losslogitsr3   )r   r   r   loss_functionrr   r   r3   )r1   rs   r   r`   outputsr   r   r   s           r(   r5   z(ConvNextV2ForImageClassification.forwardT  sy     =LDOOL<c\b<c--/%%V6RVR]R]%^D3!//
 	
r*   )NN)r8   r9   r:   accepts_loss_kwargsr0   r   r   r!   rU   
LongTensorr   r5   r?   r@   s   @r(   r   r   ;  s^       _c
!--4
EJEUEUX\E\
	-
  
r*   r   zT
    ConvNeXT V2 backbone, to be used with frameworks like DETR and MaskFormer.
    c            	       n     e Zd ZdZ fdZeeedej                  de
e   defd                     Z xZS )ConvNextV2BackboneFc                 p   t         |   |       t        |      | _        t	        |      | _        |j                  d   g|j                  z   | _        i }t        | j                  | j                        D ]  \  }}t        |d      ||<    t        j                  |      | _        | j                          y )Nr   r]   )r[   )r/   r0   rf   rv   r   r   rm   num_featureszipout_featureschannelsrW   r   
ModuleDicthidden_states_normsr   )r1   rr   r   r   rl   r2   s        r(   r0   zConvNextV2Backbone.__init__w  s     .v6(0#0034v7J7JJ !#&t'8'8$--#H 	iE<)<\Wg)h&	i#%==1D#E  	r*   rs   r`   r   c                 B   d|d<   | j                  |      } | j                  |fi |}|j                  }g }t        | j                  |      D ]:  \  }}|| j
                  v s | j                  |   |      }|j                  |       < t        t        |      |      S )a  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnextv2-tiny-1k-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```Toutput_hidden_states)feature_mapsr3   )
rv   r   r3   r   stage_namesr   r   r   r	   tuple)	r1   rs   r`   r   r   r3   r   r   hidden_states	            r(   r5   zConvNextV2Backbone.forward  s    8 *.%&??<8:F$,,GW:b[a:b'55#&t'7'7#G 	2E<)))>t77>|L##L1	2
 5+>m\\r*   )r8   r9   r:   has_attentionsr0   r   r   r   r!   r=   r   r   r	   r5   r?   r@   s   @r(   r   r   n  s^     N   %]ll%] +,%] 
	%]  ! %]r*   r   )r   r   r   r   )r   F)4r;   r!   r    r   r   activationsr   backbone_utilsr   r   modeling_outputsr	   r
   r   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   configuration_convnextv2r   
get_loggerr8   loggerr=   r<   boolr)   Moduler,   rB   r   rW   rf   rx   r   r   r   r   r   r   __all__ r*   r(   <module>r      s       & ! H  . & @ @ I 5 6 
		H	%U\\ e T V[VbVb  % %BII $",, 6299 0(bii (X!bii !H % % %"%O1 %OP !
/ !
 !
H )
'@ )
)
X ;](A ;];]| ur*   