
    i>              	          d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZmZmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlmZ ddlmZ  ej>                  e       Z!d-dejD                  de#de$dejD                  fdZ% G d dejL                        Z' G d dejP                        Z) G d dejL                        Z* G d dejL                        Z+ G d dejL                        Z,e G d d e             Z- G d! d"e-      Z.e G d# d$e-             Z/ ed%&       G d' d(e-             Z0 ed)&       G d* d+e	e-             Z1g d,Z2y).zPyTorch ConvNext model.    N)nn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)can_return_tuplemerge_with_config_defaults)capture_outputs   )ConvNextConfiginput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )zc
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/convnext/modeling_convnext.py	drop_pathr)   (   s    
 CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dedz  ddf fdZdej                  dej                  fdZde	fdZ
 xZS )
ConvNextDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r(   r0   zConvNextDropPath.__init__;   s    "r*   hidden_statesc                 D    t        || j                  | j                        S r.   )r)   r   r   )r1   r3   s     r(   forwardzConvNextDropPath.forward?   s    FFr*   c                      d| j                    S )Nzp=)r   )r1   s    r(   
extra_reprzConvNextDropPath.extra_reprB   s    DNN#$$r*   r.   )__name__
__module____qualname____doc__floatr0   r!   Tensorr5   strr7   __classcell__r2   s   @r(   r,   r,   8   sG    b#%$, #$ #GU\\ Gell G%C %r*   r,   c                   f     e Zd ZdZddd fd
Zdej                  dej                  f fdZ xZS )	ConvNextLayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    ư>channels_lastepsdata_formatc                \    t        |   |fd|i| |dvrt        d|       || _        y )NrF   )rD   channels_firstzUnsupported data format: )r/   r0   NotImplementedErrorrG   )r1   normalized_shaperF   rG   kwargsr2   s        r(   r0   zConvNextLayerNorm.__init__L   s?    )=s=f=AA%(A+&OPP&r*   featuresr   c                     | j                   dk(  r9|j                  dddd      }t        |   |      }|j                  dddd      }|S t        |   |      }|S )z
        Args:
            features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
        rI   r      r   r   )rG   permuter/   r5   )r1   rM   r2   s     r(   r5   zConvNextLayerNorm.forwardR   sj    
 //''1a3Hwx0H''1a3H  wx0Hr*   	r8   r9   r:   r;   r0   r!   r=   r5   r?   r@   s   @r(   rB   rB   F   s4    
 15/ '   r*   rB   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZ	S )ConvNextEmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t         |           t        j                  |j                  |j
                  d   |j                  |j                        | _        t        |j
                  d   dd      | _	        |j                  | _        y )Nr   kernel_sizestriderC   rI   rE   )
r/   r0   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsrB   	layernormr1   configr2   s     r(   r0   zConvNextEmbeddings.__init__e   sr     "		!4!4Q!7VEVEV_e_p_p!
 +6+>+>q+AtYij"//r*   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rY   
ValueErrorr\   r]   )r1   r`   rY   
embeddingss       r(   r5   zConvNextEmbeddings.forwardm   sV    #))!,4,,,w  **<8
^^J/
r*   )
r8   r9   r:   r;   r0   r!   FloatTensorr=   r5   r?   r@   s   @r(   rS   rS   `   s*    0E$5$5 %,, r*   rS   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextLayera3  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    c                 $   t         |           t        j                  ||dd|      | _        t        |d      | _        t        j                  |d|z        | _        t        |j                     | _        t        j                  d|z  |      | _        |j                  dkD  r7t        j                  |j                  t        j                   |      z  d	      nd | _        |d
kD  rt%        |      | _        y t        j&                         | _        y )N   r   )rV   paddinggroupsrC   rF      r   T)requires_gradr   )r/   r0   r   rX   dwconvrB   r]   Linearpwconv1r   
hidden_actactpwconv2layer_scale_init_value	Parameterr!   oneslayer_scale_parameterr,   Identityr)   )r1   r_   dimr)   r2   s       r(   r0   zConvNextLayer.__init__   s    iiSa3O*3D9yya#g.&++,yyS#. ,,q0 LL66CHX\] 	"
 9BC))4R[[]r*   rM   r   c                 b   |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  | j                  |z  }|j                  dddd      }|| j                  |      z   }|S )Nr   rO   r   r   )rn   rP   r]   rp   rr   rs   rw   r)   )r1   rM   residuals      r(   r5   zConvNextLayer.forward   s    ;;x(##Aq!Q/>>(+<<)88H%<<)%%111H<H##Aq!Q/dnnX66r*   )r   rQ   r@   s   @r(   rf   rf   x   s)    [  r*   rf   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextStagea  ConvNeXT stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`list[float]`): Stochastic depth rates for each layer.
    c                    t         	|           ||k7  s|dkD  r@t        j                  t	        |dd      t        j
                  ||||      g      | _        nt        j                         | _        |xs dg|z  }t        j                  t        |      D cg c]  }t        ||||          c}      | _	        y c c}w )Nr   rC   rI   rE   rU   r   )ry   r)   )
r/   r0   r   
ModuleListrB   rX   downsampling_layerrangerf   layers)
r1   r_   in_channelsout_channelsrV   rW   depthdrop_path_ratesjr2   s
            r(   r0   zConvNextStage.__init__   s    ,&&1*&(mm%ktIYZIIk<[Y_`'D# ')mmoD#):cUU]mm\abg\hiWX]6|q?QRi
is   B>rM   r   c                 j    | j                   D ]
  } ||      } | j                  D ]
  } ||      } |S r.   )r   r   )r1   rM   layers      r(   r5   zConvNextStage.forward   sA    ,, 	'EXH	'[[ 	'EXH	'r*   )rO   rO   rO   NrQ   r@   s   @r(   r}   r}      s(    
"  r*   r}   c                   f     e Zd ZU eed<   dZdZdZddgZ e	j                          fd       Z xZS )ConvNextPreTrainedModelr_   convnextr`   )imagerf   r}   c                     t         |   |       t        |t              rB|j                  5t        j                  |j                  | j                  j                         yyy)zInitialize the weightsN)	r/   _init_weights
isinstancerf   rw   init	constant_r_   rt   )r1   moduler2   s     r(   r   z%ConvNextPreTrainedModel._init_weights   sP     	f%fm,++7v;;T[[=_=_` 8 -r*   )r8   r9   r:   r   __annotations__base_model_prefixmain_input_nameinput_modalities_no_split_modulesr!   no_gradr   r?   r@   s   @r(   r   r      sA    "$O!(/:U]]_a ar*   r   c                   z     e Zd ZdZdeiZ fdZe ed      de	j                  dee   defd              Z xZS )ConvNextEncoderr3   c           
      N   t         |   |       t        j                         | _        t        j                  d|j                  t        |j                        d      j                  |j                        D cg c]  }|j                          }}|j                  d   }t        |j                        D ]V  }|j                  |   }t        ||||dkD  rdnd|j                  |   ||         }| j                  j!                  |       |}X | j#                          y c c}w )Nr   cpu)r   rO   r   )r   r   rW   r   r   )r/   r0   r   r   stagesr!   linspacedrop_path_ratesumdepthssplittolistrZ   r   
num_stagesr}   append	post_init)	r1   r_   xr   prev_chsiout_chsstager2   s	           r(   r0   zConvNextEncoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
 HHJ
 
 &&q)v(() 	A))!,G!$$EqqmmA& / 2E KKu%H	 	%
s   ;D"F)tie_last_hidden_statesrL   r   c                 L    | j                   D ]
  } ||      } t        |      S )N)last_hidden_state)r   r
   )r1   r3   rL   layer_modules       r(   r5   zConvNextEncoder.forward   s.     !KK 	8L(7M	8 .NNr*   )r8   r9   r:   r   r}   _can_record_outputsr0   r   r   r!   r=   r   r   r
   r5   r?   r@   s   @r(   r   r      sd    %O*M:.  E2O||O +,O 
(	O 3  Or*   r   c            	       j     e Zd Z fdZee	 ddej                  dz  dee	   de
fd              Z xZS )ConvNextModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  |j                  d   |j                        | _        | j                          y )Nrk   )r/   r0   r_   rS   rc   r   encoderr   	LayerNormrZ   layer_norm_epsr]   r   r^   s     r(   r0   zConvNextModel.__init__  s`     ,V4&v. f&9&9"&=6CXCXY 	r*   Nr`   rL   r   c                     |t        d      | j                  |      } | j                  |fi |}|j                  }| j	                  |j                  ddg            }t        |||j                        S )Nz You have to specify pixel_valuesr   )r   pooler_outputr3   )rb   rc   r   r   r]   meanr   r3   )r1   r`   rL   embedding_outputencoder_outputsr   pooled_outputs          r(   r5   zConvNextModel.forward  s    
 ?@@??<8:F$,,GW:b[a:b+== '8'='=r2h'GH7/')77
 	
r*   r.   )r8   r9   r:   r0   r   r   r!   rd   r   r   r   r5   r?   r@   s   @r(   r   r      sP     7;
!--4
GMN`Ga
	1
  
r*   r   z
    ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc            	            e Zd ZdZ fdZee	 ddej                  dz  dej                  dz  de
fd              Z xZS )	ConvNextForImageClassificationFc                 <   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r3t        j                  |j                  d   |j                        | _        nt        j                         | _        | j                          y )Nr   r   )r/   r0   
num_labelsr   r   r   ro   rZ   
classifierrx   r   r^   s     r(   r0   z'ConvNextForImageClassification.__init__.  su      ++%f- q  ii(;(;B(?ARARSDO kkmDO 	r*   Nr`   labelsr   c                      | j                   |fi |}|j                  }| j                  |      }d}|| j                  ||| j                        }t        |||j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        N)r   pooled_logitsr_   )losslogitsr3   )r   r   r   loss_functionr_   r   r3   )r1   r`   r   rL   outputsr   r   r   s           r(   r5   z&ConvNextForImageClassification.forward=  sy     =JDMM,<aZ`<a--/%%V6RVR]R]%^D3!//
 	
r*   )NN)r8   r9   r:   accepts_loss_kwargsr0   r   r   r!   rd   
LongTensorr   r5   r?   r@   s   @r(   r   r   %  s^       _c
!--4
EJEUEUX\E\
	-
  
r*   r   zQ
    ConvNeXt backbone, to be used with frameworks like DETR and MaskFormer.
    c            	       n     e Zd ZdZ fdZeeedej                  de
e   defd                     Z xZS )ConvNextBackboneFc                 p   t         |   |       t        |      | _        t	        |      | _        |j                  d   g|j                  z   | _        i }t        | j                  | j                        D ]  \  }}t        |d      ||<    t        j                  |      | _        | j                          y )Nr   rI   )rG   )r/   r0   rS   rc   r   r   rZ   num_featureszipout_featureschannelsrB   r   
ModuleDicthidden_states_normsr   )r1   r_   r   r   rY   r2   s        r(   r0   zConvNextBackbone.__init___  s     ,V4&v.#0034v7J7JJ !#&t'8'8$--#H 	gE<):<Ue)f&	g#%==1D#E  	r*   r`   rL   r   c                 B   d|d<   | j                  |      } | j                  |fi |}|j                  }g }t        | j                  |      D ]:  \  }}|| j
                  v s | j                  |   |      }|j                  |       < t        t        |      |      S )a  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnext-tiny-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```Toutput_hidden_states)feature_mapsr3   )
rc   r   r3   r   stage_namesr   r   r   r	   tuple)	r1   r`   rL   r   r   r3   r   r   hidden_states	            r(   r5   zConvNextBackbone.forwardo  s    8 *.%&??<8:F$,,GW:b[a:b'55#&t'7'7#G 	2E<)))>t77>|L##L1	2
 5+>m\\r*   )r8   r9   r:   has_attentionsr0   r   r   r   r!   r=   r   r   r	   r5   r?   r@   s   @r(   r   r   W  s^     N   %]ll%] +,%] 
	%]  ! %]r*   r   )r   r   r   r   )r   F)3r;   r!   r    r   r   activationsr   backbone_utilsr   r   modeling_outputsr	   r
   r   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   configuration_convnextr   
get_loggerr8   loggerr=   r<   boolr)   Moduler,   r   rB   rS   rf   r}   r   r   r   r   r   __all__ r*   r(   <module>r      sr      & ! H  . & @ @ I 5 2 
		H	%U\\ e T V[VbVb  %ryy % 4 0(BII (V!BII !H ao a a %O- %OP !
+ !
 !
H )
%< )
)
X 
;]}&= ;]
;]| mr*   