
    io              	          d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z
 ddlmZ ddlmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ  ej<                  e      Z d7de!e!e"f   fdZ# G d dejH                        Z% G d dejL                        Z' G d dejP                        Z) G d dejT                        Z+ G d dejP                        Z,d8dej                  de-de"dej                  fdZ. G d dejP                        Z/d9d Z0 G d! d"ejP                        Z1 G d# d$ejP                        Z2 G d% d&ejP                        Z3 G d' d(ejP                        Z4 G d) d*ejP                        Z5e G d+ d,e             Z6e G d- d.e6             Z7 ed/0       G d1 d2e6             Z8 ed30       G d4 d5ee6             Z9g d6Z:y):z9PyTorch BiT model. Also supports backbone for ViT hybrid.    N)Tensornn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)can_return_tuple   )	BitConfigreturnc                    d}| |dz
  ||dz
  z  z   dz  } | |fS t        | t              ra| j                         } | dk(  r0|dk(  r#||dz
  z  dz  dk(  r|dz
  ||dz
  z  z   dz  } | |fS d} d}| |fS | dk(  rd} | |fS |dz
  ||dz
  z  z   dz  } | |fS )al  
    Utility function to get the tuple padding value given the kernel_size and padding.

    Args:
        padding (Union[`str`, `int`], *optional*):
            Padding value, can be either `"same"`, `"valid"`. If a different value is provided the default padding from
            PyTorch is used.
        kernel_size (`int`, *optional*, defaults to 7):
            Kernel size of the convolution layers.
        stride (`int`, *optional*, defaults to 1):
            Stride value of the convolution layers.
        dilation (`int`, *optional*, defaults to 1):
            Dilation value of the convolution layers.
    Fr      samer   Tvalid)
isinstancestrlower)paddingkernel_sizestridedilationdynamics        u/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/bit/modeling_bit.pyget_padding_valuer"   )   s     GQJ(kAo">>1D'3--/f{K!O <AQF"QJ(kAo*FF1L G  G G G 
h+/&BBqHGG    c                   6     e Zd ZdZ	 	 	 	 	 	 d fd	Zd Z xZS )WeightStandardizedConv2dzConv2d with Weight Standardization. Used for ViT Hybrid model.

    Paper: [Micro-Batch Training with Batch-Channel Normalization and Weight
    Standardization](https://huggingface.co/papers/1903.10520)
    c
           
          t        ||||      \  }}
t        | 	  ||||||||       |
rt        |||      | _        |	| _        y d | _        |	| _        y )N)r   r   )r   r   r   groupsbias)r"   super__init__DynamicPad2dpadeps)self
in_channelout_channelsr   r   r   r   r'   r(   r-   
is_dynamic	__class__s              r!   r*   z!WeightStandardizedConv2d.__init__Y   ss     0V^fg 	 		
 #KBDH  DHr#   c           	         | j                   | j                  |      }t        j                  j                  | j                  j                  d| j                  d      d d dd| j                        j                  | j                        }t        j                  j                  ||| j                  | j                  | j                  | j                  | j                        }|S )Nr   T        )trainingmomentumr-   )r,   r   
functional
batch_normweightreshaper0   r-   
reshape_asconv2dr(   r   r   r   r'   )r.   hidden_stater:   s      r!   forwardz WeightStandardizedConv2d.forwardv   s    8888L1L))KK4#4#4b94PT_bhlhphp * 

*T[[
! 	 }}++&$))T[[$,,W[WbWb
 r#   )r   SAMEr   r   Fgư>__name__
__module____qualname____doc__r*   r?   __classcell__r2   s   @r!   r%   r%   R   s&     :	r#   r%   c                   *     e Zd ZdZd fd	Zd Z xZS )BitGroupNormActivationzQ
    A module that combines group normalization with an activation function.
    c                     t         |   |j                  |||       |rt        |j                     | _        y t        j                         | _        y )N)r-   affine)r)   r*   
num_groupsr   
hidden_act
activationr   Identity)r.   confignum_channelsr-   rK   apply_activationr2   s         r!   r*   zBitGroupNormActivation.__init__   sA    **Lc&Q$V%6%67DO kkmDOr#   c                     t         j                  j                  || j                  | j                  | j
                  | j                        }| j                  |      }|S N)r   r8   
group_normrL   r:   r(   r-   rN   )r.   r>   s     r!   r?   zBitGroupNormActivation.forward   sH    }}//doot{{\`\e\egkgogop|4r#   )gh㈵>TTrA   rG   s   @r!   rI   rI      s    ,r#   rI   c                   *     e Zd ZdZd fd	Zd Z xZS )r+   z
    A module that wraps dynamic padding of any input, given the parameters of the convolutional layer and the input
    hidden states.
    c                     t         |           t        |t              r||f}t        |t              r||f}t        |t              r||f}|| _        || _        || _        || _        d }|| _        y )Nc                 p    t        t        j                  | |z        dz
  |z  |dz
  |z  z   dz   | z
  d      S )Nr   r   )maxmathceil)xr   r   r   s       r!   compute_paddingz.DynamicPad2d.__init__.<locals>.compute_padding   sB    		!f*-1V;{QRZ>ZZ]^^abbdeffr#   )	r)   r*   r   intr   r   r   valuer]   )r.   r   r   r   r_   r]   r2   s         r!   r*   zDynamicPad2d.__init__   sw    k3'&4Kfc"f%Fh$ (+H& 
	g  /r#   c           	         |j                         dd  \  }}| j                  || j                  d   | j                  d   | j                  d         }| j                  || j                  d   | j                  d   | j                  d         }|dkD  s|dkD  rBt
        j                  j                  ||dz  ||dz  z
  |dz  ||dz  z
  g| j                        }|S )Nr   r   r   )r_   )	sizer]   r   r   r   r   r8   r,   r_   )r.   inputinput_heightinput_widthpadding_heightpadding_widths         r!   r?   zDynamicPad2d.forward   s    $)JJL$5!k --lD<L<LQ<OQUQ\Q\]^Q_aeananopaqr,,[$:J:J1:Mt{{[\~_c_l_lmn_op A!2MM%%!Q&!MQ$66"a'"^q%88	 jj & 	E r#   )r   rA   rG   s   @r!   r+   r+      s    
/,r#   r+   c                   8     e Zd Z	 	 	 	 	 	 ddef fdZd Z xZS )BitMaxPool2dr   c                    t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}t        |   |||||       |rt        ||||      | _        y t        j                         | _        y rT   )
r   collectionsabcIterabler)   r*   r+   r,   r   rO   )	r.   r   r   r   	ceil_moder   padding_valueuse_dynamic_paddingr2   s	           r!   r*   zBitMaxPool2d.__init__   s     &0[__=U=U%Vk]hju\v%fkoo.F.FGfV\M])(KOO4L4LM8T\^fSgfgxK#K=QDH{{}DHr#   c                     | j                  |      }t        j                  j                  || j                  | j
                  | j                  | j                  | j                        S rT   )	r,   r   r8   
max_pool2dr   r   r   r   rn   r.   hidden_statess     r!   r?   zBitMaxPool2d.forward   sM    /}}''4++T[[$,,W[WeWe
 	
r#   )Nr   F)r   r   r   T)rB   rC   rD   r^   r*   r?   rF   rG   s   @r!   ri   ri      s)      %%&
r#   ri   c                   8     e Zd ZdZdef fdZdedefdZ xZS )BitEmbeddingszL
    BiT Embeddings (stem) composed of a single aggressive convolution.
    rP   c                 .   t         |           t        |j                  |j                  ddd|j
                        | _        t        dd|j                        | _	        |j
                  7|j
                  j                         dk(  rt        j                         | _        nt        j                  dd	
      | _        |j                  dk7  rt!        ||j                        | _        nt        j                         | _        |j                  | _        y )N   r   :0yE>)r   r   r-   r   r   )r   r   rp   r@   )r   r   r   r   r5   )r   r_   preactivationrQ   )r)   r*   r%   rQ   embedding_sizeglobal_paddingconvolutionri   embedding_dynamic_paddingpoolerupperr   rO   r,   ConstantPad2d
layer_typerI   normr.   rP   r2   s     r!   r*   zBitEmbeddings.__init__   s    3!!))
 #qPVPpPpq   ,1F1F1L1L1NRX1X{{}DH''CHDH/.vFDYDYZDIDI"//r#   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }| j                  |      }| j                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaperQ   
ValueErrorr~   r,   r   r   )r.   r   rQ   	embeddings       r!   r?   zBitEmbeddings.forward  sr    #))!,4,,,w  $$\2	HHY'	IIi(	KK	*	r#   )	rB   rC   rD   rE   r   r*   r   r?   rF   rG   s   @r!   rv   rv      s'    0y 06F v r#   rv   rc   	drop_probr6   c                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )zc
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    r5   r   r   )r   )dtypedevice)r   ndimtorchrandr   r   floor_div)rc   r   r6   	keep_probr   random_tensoroutputs          r!   	drop_pathr     s    
 CxII[[^

Q 77E

5ELL YYMYYy!M1FMr#   c                   x     e Zd ZdZd	dedz  ddf fdZdej                  dej                  fdZde	fdZ
 xZS )
BitDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y rT   )r)   r*   r   )r.   r   r2   s     r!   r*   zBitDropPath.__init__(  s    "r#   rt   c                 D    t        || j                  | j                        S rT   )r   r   r6   rs   s     r!   r?   zBitDropPath.forward,  s    FFr#   c                      d| j                    S )Nzp=)r   )r.   s    r!   
extra_reprzBitDropPath.extra_repr/  s    DNN#$$r#   rT   )rB   rC   rD   rE   floatr*   r   r   r?   r   r   rF   rG   s   @r!   r   r   %  sG    b#%$, #$ #GU\\ Gell G%C %r#   r   c                 f    |}t        |t        | |dz  z         |z  |z        }|d| z  k  r||z  }|S )Nr   g?)rY   r^   )r_   divisor	min_value	new_values       r!   make_divr   3  sG    IIs57Q;#677BWLMI3;W	r#   c                   :     e Zd ZdZ	 	 	 	 	 	 	 	 d fd	Zd Z xZS )BitPreActivationBottleneckLayera  Pre-activation (v2) bottleneck block.
    Follows the implementation of "Identity Mappings in Deep Residual Networks":
    https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua

    Except it puts the stride on 3x3 conv when available.
    c           	         t         |           |xs |}|xs |}t        ||z        }|
rt        ||||d      | _        nd | _        t        ||      | _        t        ||dd|j                        | _	        t        ||      | _
        t        ||d||d|j                        | _        t        ||      | _        t        ||dd|j                        | _        |	d	kD  rt        |	      | _        y t        j                          | _        y )
NTr   preactr   ry   r-   r   r{   r   )r   r'   r-   r   r   )r)   r*   r   BitDownsampleConv
downsamplerI   norm1r%   r}   conv1norm2conv2norm3conv3r   r   rO   r   )r.   rP   in_channelsr0   bottle_ratior   r   first_dilationr'   drop_path_rateis_first_layermid_channelsr2   s               r!   r*   z(BitPreActivationBottleneckLayer.__init__C  s    	'38#2{| ;</DO #DO+FK@
-k<PT^d^s^st
+FN
-,&T[a[p[p

 ,FLA
-lL!QU_e_t_tu
8F8J^4PRP[P[P]r#   c                 0   | j                  |      }|}| j                  | j                  |      }| j                  |      }| j                  | j	                  |            }| j                  | j                  |            }| j                  |      }||z   S rT   )r   r   r   r   r   r   r   r   )r.   rt   hidden_states_preactshortcuts       r!   r?   z'BitPreActivationBottleneckLayer.forwardo  s    #zz-8 !??&';<H 

#78

4::m#<=

4::m#<=}5x''r#   N      ?r   r   Nr   r5   FrA   rG   s   @r!   r   r   ;  s.     *^X(r#   r   c                   :     e Zd ZdZ	 	 	 	 	 	 	 	 d fd	Zd Z xZS )BitBottleneckLayerz\Non Pre-activation bottleneck block, equivalent to V1.5/V1b bottleneck. Used for ViT Hybrid.c           
      D   t         |           |xs |}|xs |}t        ||z        }|
rt        ||||d      | _        nd | _        t        ||dd|j                        | _        t        ||      | _	        t        ||d|||d|j                        | _
        t        ||      | _        t        ||dd|j                        | _        t        ||d	      | _        |	d
kD  rt        |	      nt        j                          | _        t$        |j&                     | _        y )NFr   r   ry   r   r{   r   )r   r   r'   r-   r   rQ   rR   r   )r)   r*   r   r   r   r%   r}   r   rI   r   r   r   r   r   r   r   rO   r   r   rM   rN   )r.   rP   r   r0   r   r   r   r   r'   r   r   mid_chsr2   s               r!   r*   zBitBottleneckLayer.__init__  s    	'38#2{<,67/DO #DO-k7A4Y_YnYno
+FI
-#))	

 ,FI
-g|QDZ`ZoZop
+F`ef
8F8J^4PRP[P[P] !2!23r#   c                 Z   |}| j                   | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  ||z         }|S rT   )	r   r   r   r   r   r   r   r   rN   )r.   rt   r   s      r!   r?   zBitBottleneckLayer.forward  s     ??&}5H 

=1

=1

=1

=1

=1

=1}5(@Ar#   r   rA   rG   s   @r!   r   r     s+    f /4br#   r   c                   *     e Zd Z	 	 d fd	Zd Z xZS )r   c                     t         |           t        ||d|d|j                        | _        |rt        j                         | _        y t        ||d      | _        y )Nr   ry   )r   r-   r   Fr   )	r)   r*   r%   r}   convr   rO   rI   r   )r.   rP   r   r0   r   r   r2   s         r!   r*   zBitDownsampleConv.__init__  s\     	,qT6K`K`
	
  KKM 		 (\\ab 		r#   c                 B    | j                  | j                  |            S rT   )r   r   )r.   r\   s     r!   r?   zBitDownsampleConv.forward  s    yy1&&r#   )r   T)rB   rC   rD   r*   r?   rF   rG   s   @r!   r   r     s     
$'r#   r   c                   >     e Zd ZdZ	 	 d fd	Zd ZdedefdZ xZS )BitStagez7
    A ResNet v2 stage composed by stacked layers.
    c	                 ^   t         |           |dv rdnd}	|j                  dk(  rt        }
nt        }
|}t        j                         | _        t        |      D ]Q  }| j                  |||      \  }}}| j                  j                  t        |       |
|||||||	||	             |}|}	S y )N)r   r   r   r   
bottleneck)r   r   r   r   r   r   )r)   r*   r   r   r   r   
Sequentiallayersrange_get_updated_hyperparameters
add_moduler   )r.   rP   r   r0   r   r   depthr   layer_dropoutr   	layer_clsprev_chs	layer_idxr   r   r2   s                  r!   r*   zBitStage.__init__  s     	&&0a ,*I7Immou 	&I595V5V6=62FNN KK""I !%!-#1#1#1
 $H%N+	&r#   c                 8    |r||   }nd}|dk7  rd}|dk(  }|||fS )zt
        Get the new hyper-parameters with respect to the previous ones and the index of the current layer.
        r5   r   r    )r.   r   r   r   r   r   s         r!   r   z%BitStage._get_updated_hyperparameters  s8     *95N N>F"a~~55r#   rc   r   c                 T    |}t        | j                        D ]  \  }} ||      } |S rT   )	enumerater   )r.   rc   r>   _layers        r!   r?   zBitStage.forward"  s3    !$++. 	/HAu .L	/r#   )r   N)	rB   rC   rD   rE   r*   r   r   r?   rF   rG   s   @r!   r   r     s.     ,&\6 V  r#   r   c            	       F     e Zd Zdef fdZd Z	 d	dedededefdZ	 xZ
S )

BitEncoderrP   c           
         t         |           t        j                  g       | _        |j
                  }d}d}t        j                  t        j                  d|j                  t        |j                                    j                  |j                        D cg c]  }|j                          }}t        t!        |j                  |j"                  |            D ]`  \  }\  }}	}
| j%                  |||	||      \  }}}t'        |||||||
      }|}||z  }| j                  j)                  t+        |      |       b y c c}w )N   r   r   )r   r   r   r   )r)   r*   r   
ModuleListstagesr|   r   r   nplinspacer   sumdepthssplittolistr   ziphidden_sizesr   r   r   r   )r.   rP   r   current_strider   r\   layer_dropouts	stage_idxcurrent_depthcurrent_hidden_sizer   r0   r   stager2   s                 r!   r*   zBitEncoder.__init__*  sA   mmB'((  \\"++a1F1FFMMHZ"[\bbcicpcpq
 HHJ
 

 OXv22NCO
 	:JIJ':M .2-N-N>+>&.*L&( !#+E $Hf$NKK""3y>59+	:
s   Ec                 z    t        ||j                  z        }|dk(  rdnd}||j                  k\  r||z  }d}|||fS )Nr   r   r   )r   width_factoroutput_stride)r.   r   r   r   r   rP   r0   r   s           r!   r   z'BitEncoder._get_updated_hyperparametersP  sO     3f6I6I IJ1n!V111HFVX--r#   r>   output_hidden_statesreturn_dictr   c                     |rdnd }| j                   D ]  }|r||fz   } ||      } |r||fz   }|st        d ||fD              S t        ||      S )Nr   c              3   &   K   | ]	  }||  y wrT   r   ).0vs     r!   	<genexpr>z%BitEncoder.forward.<locals>.<genexpr>g  s     SqQ]Ss   )last_hidden_statert   )r   tupler   )r.   r>   r   r   rt   stage_modules         r!   r?   zBitEncoder.forwardX  sv     3 KK 	6L# - ?'5L		6  )\O;MS\=$ASSS-*'
 	
r#   )FT)rB   rC   rD   r   r*   r   r   boolr   r?   rF   rG   s   @r!   r   r   )  sA    $:y $:L. ]a
"
:>
UY
	'
r#   r   c                   X    e Zd ZU eed<   dZdZdZdgZ e	j                         d        Zy)BitPreTrainedModelrP   bit)imager   rv   c                    t        |t        j                        r#t        j                  |j
                  dd       y t        |t        j                        rt        j                  |j
                  t        j                  d             |j                  xt        j                  j                  j                  |j
                        \  }}|dkD  rdt        j                  |      z  nd}t        j                  |j                  | |       y y t        |t        j                  t        j                  f      rt        j                   |j
                  d       t        j                   |j                  d       t#        |dd       ^t        j$                  |j&                         t        j(                  |j*                         t        j$                  |j,                         y y y )	Nfan_outrelu)modenonlinearity   )ar   r   running_mean)r   r   Conv2dinitkaiming_normal_r:   Linearkaiming_uniform_rZ   sqrtr(   r   _calculate_fan_in_and_fan_outuniform_BatchNorm2d	GroupNorm	constant_getattrzeros_r  ones_running_varnum_batches_tracked)r.   modulefan_inr   bounds        r!   _init_weightsz BitPreTrainedModel._init_weightsw  s>   fbii(  YVT		*!!&--499Q<@{{&!HHMMGGV	17!DIIf--fkkE659 '  >?NN6==!,NN6;;*v~t4@F//0

6--.F667 A @r#   N)rB   rC   rD   r   __annotations__base_model_prefixinput_modalitiesmain_input_name_no_split_modulesr   no_gradr  r   r#   r!   r   r   o  s<    !$O()U]]_8 8r#   r   c            
       R     e Zd Z fdZe	 	 ddededz  dedz  defd       Z xZ	S )	BitModelc                 J   t         |   |       || _        t        |      | _        t        |      | _        |j                  dk(  rt        ||j                  d         nt        j                         | _        t        j                  d      | _        | j                          y )Nrz   r4   r{   )r   r   )r)   r*   rP   rv   embedderr   encoderr   rI   r   r   rO   r   AdaptiveAvgPool2dr   	post_initr   s     r!   r*   zBitModel.__init__  s     %f-!&)   O3 #68K8KB8OP 		 **62r#   Nr   r   r   r   c                 J   ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |||      }|d   }| j                  |      }| j                  |      }|s
||f|dd  z   S t        |||j                        S )Nr   r   r   r   )r   pooler_outputrt   )	rP   r   r   r&  r'  r   r   r   rt   )	r.   r   r   r   kwargsembedding_outputencoder_outputsr   pooled_outputs	            r!   r?   zBitModel.forward  s     %9$D $++JjJj 	 &1%<k$++BYBY==6,,3GU` ' 
 ,A. II&78$56%}58KKK7/')77
 	
r#   NN)
rB   rC   rD   r*   r   r   r   r   r?   rF   rG   s   @r!   r$  r$    sR    "  -1#'	

 #Tk
 D[	
 
2
 
r#   r$  z
    BiT Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d	dej                  dz  dej                  dz  dedz  dedz  de	f
d       Z
 xZS )
BitForImageClassificationc                 |   t         |   |       |j                  | _        t        |      | _        t        j                  t        j                         |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                               | _        | j                          y )Nr   r4   )r)   r*   
num_labelsr$  r   r   r   Flattenr  r   rO   
classifierr)  r   s     r!   r*   z"BitForImageClassification.__init__  s      ++F#--JJLEKEVEVYZEZBIIf))"-v/@/@A`b`k`k`m

 	r#   Nr   labelsr   r   r   c                 B   ||n| j                   j                  }| j                  |||      }|r|j                  n|d   }| j	                  |      }d}	|| j                  ||| j                         }	|s|f|dd z   }
|	|	f|
z   S |
S t        |	||j                        S )a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr+  r   r   )losslogitsrt   )rP   r   r   r,  r8  loss_functionr   rt   )r.   r   r9  r   r   r-  outputsr0  r<  r;  r   s              r!   r?   z!BitForImageClassification.forward  s     &1%<k$++BYBY((<>R`k(l1<--'!*/%%ffdkkBDY,F'+'7D7V#CVC3f\c\q\qrrr#   )NNNN)rB   rC   rD   r*   r   r   FloatTensor
LongTensorr   r   r?   rF   rG   s   @r!   r4  r4    s    
  26*.,0#'s''$.s   4's #Tk	s
 D[s 
.s sr#   r4  zL
    BiT backbone, to be used with frameworks like DETR and MaskFormer.
    c                   j     e Zd ZdZ fdZeee	 	 d	dede	dz  de	dz  de
fd                     Z xZS )
BitBackboneFc                     t         |   |       t        |      | _        |j                  g|j
                  z   | _        | j                          y rT   )r)   r*   r$  r   r|   r   num_featuresr)  r   s     r!   r*   zBitBackbone.__init__  sD     F##223f6I6II 	r#   Nr   r   r   r   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |dd      }|j                  }d}t        | j                        D ]  \  }}	|	| j                  v s|||   fz  } |s|f}
|r|
|j                  fz  }
|
S t        ||r|j                  d      S dd      S )a  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))

        >>> processor = AutoImageProcessor.from_pretrained("google/bit-50")
        >>> model = AutoBackbone.from_pretrained("google/bit-50")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr+  r   )feature_mapsrt   
attentions)	rP   r   r   r   rt   r   stage_namesout_featuresr
   )r.   r   r   r   r-  r>  rt   rF  idxr   r   s              r!   r?   zBitBackbone.forward  s    < &1%<k$++BYBY$8$D $++JjJj 	 ((<dPT(U--#D$4$45 	6JC)))s!3 55	6 "_F#70022M%3G'//
 	
MQ
 	
r#   r1  )rB   rC   rD   has_attentionsr*   r   r	   r   r   r   r
   r?   rF   rG   s   @r!   rB  rB    si     N   -1#'	3
3
 #Tk3
 D[	3
 
3
  ! 3
r#   rB  )r4  r$  r   rB  )Nrx   r   r   )r5   F)   );rE   rk   rZ   numpyr   r   r   r    r   r
  activationsr   backbone_utilsr   r	   modeling_outputsr
   r   r   r   modeling_utilsr   utilsr   r   utils.genericr   configuration_bitr   
get_loggerrB   loggerr   r   r"   r	  r%   r  rI   Moduler+   	MaxPool2dri   rv   r   r   r   r   r   r   r   r   r   r   r$  r4  rB  __all__r   r#   r!   <module>r[     s   @      & ! H  . , - ( 
		H	%&ERWY]R]L^ &R-ryy -`R\\ $0299 0f
2<< 
6/BII /fU\\ e T V[VbVb  %")) %A(bii A(HF FR'		 '.Gryy GTC
 C
L 8 8 86 2
! 2
 2
j ,s 2 ,s,s^ 
B
-!3 B

B
J Yr#   