
    i2n                     
   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmc mZ	 ddl
mZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%  G d dejL                        Z' G d dejL                        Z( G d dejL                        Z) G d de      Z* G d dejL                        Z+ G d de      Z,e ed       G d de                    Z- G d  d!ejL                        Z. G d" d#ej^                        Z0 G d$ d%ejL                        Z1 G d& d'e*      Z2 G d( d)e*      Z3 G d* d+e*      Z4ee G d, d-e                    Z5 ed.       G d/ d0e*             Z6g d1Z7y)2    N)	dataclass   )initialization)ACT2CLSACT2FN)filter_output_hidden_states)GradientCheckpointingLayer)BaseModelOutputModelOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)merge_with_config_defaults)capture_outputs   )SLANeXtConfigSLANeXtVisionConfigc                   >    e Zd ZdZ fdZdededej                  dej                  fdZdej                  d	ej                  d
ej                  de	eef   de	eef   dej                  fdZ
ddej                  de	ej                  ej                  f   fdZ xZS )SLANeXtVisionAttentionz=Multi-head Attention block with relative position embeddings.c                 .   t         |           |dk(  r2|j                  |j                  z  |j                  |j                  z  fn||f}|j                  | _        |j
                  |j                  z  }|dz  | _        |j                  | _        t        j                  |j
                  |j
                  dz  |j                        | _        t        j                  |j
                  |j
                        | _        |j                  | _        | j                  r||t        d      t        j                   t#        j$                  d|d   z  dz
  |            | _        t        j                   t#        j$                  d|d   z  dz
  |            | _        y y )Nr   g      r   biaszBInput size must be provided if using relative positional encoding.   r   )super__init__
image_size
patch_sizenum_attention_headshidden_sizescaleattention_dropoutdropoutnnLinearqkv_biasqkvprojuse_rel_pos
ValueError	Parametertorchzeros	rel_pos_h	rel_pos_w)selfconfigwindow_size
input_sizehead_dim	__class__s        }/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/slanext/modeling_slanext.pyr   zSLANeXtVisionAttention.__init__.   sX    a &"3"33V5F5F&J[J[5[\{+ 	 $*#=#= %%)C)CCt^
//99V//1C1Ca1Gfoo^IIf00&2D2DE	!--! !eff  \\%++a*Q-6G!6KX*VWDN\\%++a*Q-6G!6KX*VWDN     q_sizek_sizerel_posreturnc                    t        dt        ||      z  dz
        }t        j                  |j	                  d|j
                  d   d      j                  ddd      |d      }|j	                  d|      j                  dd      }t        j                  |      dddf   t        ||z  d      z  }t        j                  |      dddf   t        ||z  d      z  }||z
  |dz
  t        ||z  d      z  z   }||j                            S )	a  
        Get relative positional embeddings according to the relative positions of
            query and key sizes.

        Args:
            q_size (int):
                size of the query.
            k_size (int):
                size of key k.
            rel_pos (`torch.Tensor`):
                relative position embeddings (L, channel).

        Returns:
            Extracted positional embeddings according to relative positions.
        r   r   r   linear)sizemodeN      ?)
intmaxFinterpolatereshapeshapepermuter-   arangelong)	r1   r9   r:   r;   max_rel_distrel_pos_resizedq_coordsk_coordsrelative_coordss	            r7   get_rel_posz"SLANeXtVisionAttention.get_rel_posG   s     1s6622Q67--OOAw}}Q/4<<Q1E

 *11"lCKKAqQ <<'403v3LL<<'a03v3LL#h.6A:Vf_VYAZ2ZZ33566r8   queryr/   r0   c                 T   |\  }}|\  }}	| j                  |||      }
| j                  ||	|      }|j                  \  }}}|j                  ||||      }t        j                  d||
      }t        j                  d||      }|dddddddddf   |dddddddddf   z   }|S )a  
        Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
        https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py

        Args:
            query (`torch.Tensor`):
                query q in the attention layer with shape (batch_size, query_height * query_width, channel).
            rel_pos_h (`torch.Tensor`):
                relative position embeddings (Lh, channel) for height axis.
            rel_pos_w (`torch.Tensor`):
                relative position embeddings (Lw, channel) for width axis.
            q_size (tuple):
                spatial sequence size of query q with (query_height, query_width).
            k_size (tuple):
                spatial sequence size of key k with (key_height, key_width).

        Returns:
            decomposed_rel_pos (`torch.Tensor`):
                decomposed relative position embeddings.
        zbhwc,hkc->bhwkzbhwc,wkc->bhwkN)rQ   rH   rG   r-   einsum)r1   rR   r/   r0   r9   r:   query_heightquery_width
key_height	key_widthrelative_position_heightrelative_position_width
batch_size_dimreshaped_queryrel_hrel_wdecomposed_rel_poss                      r7   get_decomposed_rel_posz-SLANeXtVisionAttention.get_decomposed_rel_posg   s    8 %+!k &
I#'#3#3L*i#X "&"2"2;	9"U"[[
Asz<cR-~?WX-~?VW"1aAt#34uQ1dA=M7NN!!r8   hidden_statesc                    |j                   \  }}}}| j                  |      j                  |||z  d| j                  d      j	                  ddddd      }|j                  d|| j                  z  ||z  d      j                  d      \  }}	}
|| j                  z  |	j                  dd      z  }| j                  rC| j                  || j                  | j                  ||f||f      }|j                  |      }||z   }t        j                  j                  j!                  |t        j"                  d      j%                  |j&                        }t        j                  j)                  || j(                  | j*                  	      }||
z  j                  || j                  ||d      }|j	                  ddddd      j                  |||d      }| j-                  |      }||fS )
Nr   r>   r   r   r      )dtyper]   )ptraining)rH   r(   rG   r    rI   unbindr"   	transposer*   rb   r/   r0   
reshape_asr-   r%   
functionalsoftmaxfloat32torg   r$   ri   r)   )r1   rc   output_attentionsr[   heightwidthr\   r(   rR   keyvalueattn_weightsra   
attn_probsattn_outputs                  r7   forwardzSLANeXtVisionAttention.forward   s   '4':':$
FE1 HH]#WZ%D4L4LbQWQ1a# 	  KK:8P8P+PRX[`R`bdellmnosE

*cmmB.CC!%!<!<t~~t~~QV" "4!>!>|!L'*<<Lxx**22<u}}Z\2]``afalalm]]**<4<<RVR_R_*`
!E)22:t?W?WY_afhjk!))!Q1a8@@VUZ\^_ii,L((r8   N)__name__
__module____qualname____doc__r   rC   r-   TensorrQ   tuplerb   ry   __classcell__r6   s   @r7   r   r   +   s    GX27# 7s 7U\\ 7ell 7@("||(" <<(" <<	("
 c3h(" c3h(" 
("T)U\\ )eTYT`T`bgbnbnTnNo )r8   r   c            	       x     e Zd Z fdZdej
                  dej
                  dej
                  dee   fdZ xZ	S )SLANeXtAttentionGRUCellc                    t         |           t        j                  ||d      | _        t        j                  ||      | _        t        j                  |dd      | _        t        j                  ||z   |      | _        y )NFr   r   )	r   r   r%   r&   input_to_hiddenhidden_to_hiddenscoreGRUCellrnn)r1   r4   r!   num_embeddingsr6   s       r7   r   z SLANeXtAttentionGRUCell.__init__   sa    !yy[uM "		+{ CYY{AE:
::j>9;Gr8   prev_hiddenbatch_hiddenchar_onehotskwargsc                    | j                  |      }| j                  |      j                  d      }||z   }t        j                  |      }| j                  |      }t        j                  |dt        j                        j                  |j                        }|j                  dd      }t        j                  ||      j                  d      }	t        j                  |	|gd      }
| j                  |
|      }||fS )Nr   r]   rg   r   )r   r   	unsqueezer-   tanhr   rE   rn   ro   rp   rg   rk   matmulsqueezecatr   )r1   r   r   r   r   batch_hidden_projprev_hidden_projattention_scoresrv   contextconcat_contextrc   s               r7   ry   zSLANeXtAttentionGRUCell.forward   s     !00>00=GGJ,/?? ::&67::&67yy!1qNQQRbRhRhi#--a3,,|\:BB1EG\#:A>=l**r8   )
r{   r|   r}   r   r-   FloatTensorr   r   ry   r   r   s   @r7   r   r      sL    H+&&+ ''+ ''	+
 +,+r8   r   c                   &     e Zd Zd fd	Zd Z xZS )
SLANeXtMLPc                     t         |           t        j                  ||      | _        t        j                  ||      | _        |t        j                         | _        y t        |          | _        y rz   )	r   r   r%   r&   fc1fc2Identityr   act_fn)r1   r!   out_channels
activationr6   s       r7   r   zSLANeXtMLP.__init__   sR    99[+699[,7'1'9bkkmwz?R?Tr8   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rz   )r   r   r   r1   rc   s     r7   ry   zSLANeXtMLP.forward   s2    //M2r8   rz   )r{   r|   r}   r   ry   r   r   s   @r7   r   r      s    Ur8   r   c                   j     e Zd ZU eed<   dZdZdZdZddgZ	 e
j                          fd       Z xZS )	SLANeXtPreTrainedModelr2   backbonepixel_valuesimageTstructure_attention_cellstructure_generatorc                    t         |   |       t        |t              r,|j                   t        j                  |j                  d       t        |t              rL|j                  r@t        j                  |j                  d       t        j                  |j                  d       t        |t        j                        r|j                  dkD  r"dt        j                  |j                        z  nd}t        j                   |j"                  | |       t        j                   |j$                  | |       |j&                  "t        j                   |j&                  | |       |j(                  "t        j                   |j(                  | |       t        |t*              rdt        j                  | j,                  j                  dz        z  }|j.                  fD ]  }|j1                         D ]n  }t        |t        j2                        st        j                   |j4                  | |       |j6                  Mt        j                   |j6                  | |       p  yy)zInitialize the weightsNg        r   rB   )r   _init_weights
isinstanceSLANeXtVisionEncoder	pos_embedinit	constant_r   r*   r/   r0   r%   r   r!   mathsqrtuniform_	weight_ih	weight_hhbias_ihbias_hhSLANeXtSLAHeadr2   r   childrenr&   weightr   )r1   modulestd	generatorlayerr6   s        r7   r   z$SLANeXtPreTrainedModel._init_weights   s    	f% f23+v//5 f45!!v//5v//5 fbjj)9?9K9Ka9O#		&"4"455UVCMM&**SD#6MM&**SD#6~~)fnnsdC8~~)fnnsdC8 fn-		$++"9"9C"?@@C$88: A	&//1 AE!%3ellSD#> ::1 MM%**sdC@	AA .r8   )r{   r|   r}   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_keep_in_fp32_modules_strictr-   no_gradr   r   r   s   @r7   r   r      sH    "$O!&*#$>@U#V U]]_"A "Ar8   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )SLANeXtMLPBlockc                    t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        |j                     | _
        y rz   )r   r   r%   r&   r!   mlp_dimlin1lin2r   
hidden_actactr1   r2   r6   s     r7   r   zSLANeXtMLPBlock.__init__  sX    IIf00&..A	IIfnnf.@.@A	&++,r8   rc   r<   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rz   )r   r   r   r   s     r7   ry   zSLANeXtMLPBlock.forward  s2    		-0/		-0r8   )r{   r|   r}   r   r-   r   ry   r   r   s   @r7   r   r     s#    -U\\ ell r8   r   c            
           e Zd Z fdZdej
                  dedeej
                  eeef   f   fdZdej
                  dedeeef   deeef   dej
                  f
d	Z	dej
                  deej                     fd
Z xZS )SLANeXtVisionLayerc                 2   t         |           t        j                  |j                  |j
                        | _        t        ||      | _        t        j                  |j                  |j
                        | _	        t        |      | _        || _        y )N)eps)r   r   r%   	LayerNormr!   layer_norm_epslayer_norm1r   attnlayer_norm2r   mlpr3   )r1   r2   r3   r6   s      r7   r   zSLANeXtVisionLayer.__init__  sn    <<(:(:@U@UV*6;?	<<(:(:@U@UV"6*&r8   rc   r3   r<   c           	      L   |j                   \  }}}}|||z  z
  |z  }|||z  z
  |z  }t        j                  |ddd|d|f      }||z   ||z   }
}	|j                  ||	|z  ||
|z  ||      }|j	                  dddddd      j                         j                  d|||      }||	|
ffS )a  
        Args:
        Partition into non-overlapping windows with padding if needed.
            hidden_states (tensor): input tokens with [batch_size, height, width, channel]. window_size (int): window
            size.

        Returns:
            windows: windows after partition with [batch_size * num_windows, window_size, window_size, channel].
            (pad_height, pad_width): padded height and width before partition
        r   r   r   r   re      r>   )rH   rE   padrG   rI   
contiguous)r1   rc   r3   r[   rr   rs   channelpad_hpad_w
pad_height	pad_widthwindowss               r7   window_partitionz#SLANeXtVisionLayer.window_partition$  s     .;-@-@*
FE7v33{Bu{22kAmaAua-GH &I
%--
k1;	[@XZegn
  ''1aAq9DDFNNrS^`kmtuY///r8   r   padding_shapeoriginal_shapec                 2   |\  }}|\  }}|j                   d   ||z  |z  |z  z  }	|j                  |	||z  ||z  ||d      }
|
j                  dddddd      j                         j                  |	||d      }
|
ddd|d|ddf   j                         }
|
S )	aS  
        Args:
        Window unpartition into original sequences and removing padding.
            hidden_states (tensor):
                input tokens with [batch_size * num_windows, window_size, window_size, channel].
            window_size (int):
                window size.
            padding_shape (Tuple):
                padded height and width (pad_height, pad_width).
            original_shape (Tuple): original height and width (height, width) before padding.

        Returns:
            hidden_states: unpartitioned sequences with [batch_size, height, width, channel].
        r   r>   r   r   r   re   r   N)rH   rG   rI   r   )r1   r   r3   r   r   r   r   rr   rs   r[   rc   s              r7   window_unpartitionz%SLANeXtVisionLayer.window_unpartition<  s    " !.
I&]]1%*y*@K*OS^*^_

k193K[Zegi
 !!!Q1a3>>@HHU_ajlno 	 &a&&5&!&;<GGIr8   c                    |}| j                  |      }| j                  dkD  r=|j                  d   |j                  d   }}| j                  || j                        \  }}| j	                  |      \  }}| j                  dkD  r | j                  || j                  f      }||z   }| j                  |      }|| j                  |      z   }|S )Nr   r   r   )rc   )r   r3   rH   r   r   r   r   r   )r1   rc   residualrr   rs   r   rv   layernorm_outputs           r7   ry   zSLANeXtVisionLayer.forwardZ  s     ((7a)//2M4G4G4JEF+/+@+@PTP`P`+a(M=&*ii' '0 '
#| a 33M4CSCSUbekmrdstM =0++M:%1A(BBr8   )r{   r|   r}   r   r-   r   rC   r   r   r   r   ry   r   r   s   @r7   r   r     s    '0ell 0 0QVW\WcWcejknpsksetWtQu 00||25FKCQTHoglmprumugv	<U\\ eE<M<M6N r8   r   z
    Base class for slanext vision model's outputs that also contains image embeddings obtained by applying the projection
    layer to the pooler_output.
    )custom_introc                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
ej                  df   dz  ed<   dZe
ej                  df   dz  ed<   y)SLANeXtVisionEncoderOutputz
    image_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`):
        The image embeddings obtained by applying the projection layer to the pooler_output.
    Nimage_embedslast_hidden_state.rc   
attentions)r{   r|   r}   r~   r   r-   r   r   r   rc   r   r    r8   r7   r   r   o  sr    
 .2L%##d*126u((4/6:>M5**C/047>7;Je'',-4;r8   r   c                   (     e Zd ZdZ fdZd Z xZS )SLANeXtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    c                    t         |           |j                  |j                  }}|j                  |j
                  }}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _        || _
        t        j                  ||||      | _        y )Nr   r   )kernel_sizestride)r   r   r   r   num_channelsr!   r   collectionsabcIterablenum_patchesr%   Conv2d
projection)r1   r2   r   r   r   r!   r  r6   s          r7   r   zSLANeXtPatchEmbeddings.__init__  s    !'!2!2F4E4EJ
$*$7$79K9Kk#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&))L+:^hir8   c                 N   |j                   \  }}}}|| j                  k7  rt        d      || j                  d   k7  s|| j                  d   k7  r2t        d| d| d| j                  d    d| j                  d    d	      | j	                  |      j                  ddd	d      }|S )
NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   r   zInput image size (*z) doesn't match model (z).r   r   )rH   r   r+   r   r  rI   )r1   r   r[   r   rr   rs   
embeddingss          r7   ry   zSLANeXtPatchEmbeddings.forward  s    2>2D2D/
L&%4,,,w  T__Q''5DOOA4F+F$VHAeW4KDOO\]L^K__`aeapapqras`ttvw  __\2::1aAF
r8   )r{   r|   r}   r~   r   ry   r   r   s   @r7   r   r     s    jr8   r   c                   f     e Zd ZdZddd fd
Zdej                  dej                  f fdZ xZS )	SLANeXtLayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    gư>channels_last)r   data_formatc                \    t        |   |fd|i| |dvrt        d|       || _        y )Nr   )r
  channels_firstzUnsupported data format: )r   r   NotImplementedErrorr  )r1   normalized_shaper   r  r   r6   s        r7   r   zSLANeXtLayerNorm.__init__  s?    )=s=f=AA%(A+&OPP&r8   featuresr<   c                     | j                   dk(  r9|j                  dddd      }t        |   |      }|j                  dddd      }|S t        |   |      }|S )z
        Args:
            features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
        r  r   r   r   r   )r  rI   r   ry   )r1   r  r6   s     r7   ry   zSLANeXtLayerNorm.forward  sj    
 //''1a3Hwx0H''1a3H  wx0Hr8   )	r{   r|   r}   r~   r   r-   r   ry   r   r   s   @r7   r	  r	    s4    
 15/ '   r8   r	  c                   *     e Zd Zdef fdZd Z xZS )SLANeXtVisionNeckr2   c                 j   t         |           || _        t        j                  |j
                  |j                  dd      | _        t        |j                  d      | _	        t        j                  |j                  |j                  ddd      | _
        t        |j                  d      | _        y )Nr   F)r   r   r  )r  r   )r   paddingr   )r   r   r2   r%   r  r!   output_channelsconv1r	  r   conv2r   r   s     r7   r   zSLANeXtVisionNeck.__init__  s    YYv1163I3IWX_de
+F,B,BP`aYYv55v7M7M[\fgnst
+F,B,BP`ar8   c                     |j                  dddd      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }|S )Nr   r   r   r   )rI   r  r   r  r   r   s     r7   ry   zSLANeXtVisionNeck.forward  sZ    %--aAq9

=1((7

=1((7r8   )r{   r|   r}   r   r   ry   r   r   s   @r7   r  r    s    b2 br8   r  c            
            e Zd ZeedZdZdef fdZd Z	e
 ed      	 dd	ej                  dz  d
ee   deez  fd              Z xZS )r   )rc   r   r   r2   c                    t         |   |       || _        |j                  | _        t	        |      | _        d | _        |j                  rht        j                  t        j                  d|j                  |j                  z  |j                  |j                  z  |j                              | _        t        j                         | _        t!        |j"                        D ]D  }t%        |||j&                  vr|j(                  nd      }| j                  j+                  |       F t-        |      | _        d| _        | j3                          y )Nr   r   )r3   F)r   r   r2   r   r   patch_embedr   use_abs_posr%   r,   r-   r.   r   r!   
ModuleListlayersrangenum_hidden_layersr   global_attn_indexesr3   appendr  neckgradient_checkpointing	post_init)r1   r2   ir   r6   s       r7   r   zSLANeXtVisionEncoder.__init__  s     ++1&9\\%%):)::%%):)::&&	DN mmov//0 	&A&236;U;U2UF..[\E KKu%	& &f-	&+#r8   c                     | j                   S rz   )r  )r1   s    r7   get_input_embeddingsz)SLANeXtVisionEncoder.get_input_embeddings  s    r8   F)tie_last_hidden_statesNr   r   r<   c                     |t        d      | j                  |      }| j                  || j                  z   }| j                  D ]
  } ||      } | j	                  |      }t        |      S )Nz You have to specify pixel_values)r   )r+   r  r   r  r$  r   )r1   r   r   rc   layer_modules        r7   ry   zSLANeXtVisionEncoder.forward  sy    
 ?@@((6>>%)DNN:M KK 	8L(7M	8		-0)+
 	
r8   rz   )r{   r|   r}   r   r   _can_record_outputsr   r   r   r)  r   r   r-   r   r   r   r   r   ry   r   r   s   @r7   r   r     sz    ,>Nde!2 >   E27;
!--4
GMN`Ga
	+	+
 3  
r8   r   c                   X     e Zd Z	 ddedz  f fdZdej                  dee   fdZ	 xZ
S )SLANeXtBackboneNr2   c                     t         |   |       t        |j                        | _        t        j                  |j                  |j                  dddd      | _	        | j                          y )Nr   r   r   F)r   r   r  r   )r   r   r   vision_configvision_towerr%   r  post_conv_in_channelspost_conv_out_channels	post_convr&  r1   r2   r   r6   s      r7   r   zSLANeXtBackbone.__init__  s^    
 	 01E1EF((&*G*GUV_`jkrw
 	r8   rc   r   c                      | j                   |fi |}| j                  |j                        }|j                  d      j	                  dd      }t        ||j                  |j                        S )Nr   r   )r   rc   r   )r2  r5  r   flattenrk   r
   rc   r   )r1   rc   r   vision_outputs       r7   ry   zSLANeXtBackbone.forward  sl    )))-B6B}'F'FG%--a0::1a@+'55$//
 	
r8   rz   )r{   r|   r}   dictr   r-   r   r   r   ry   r   r   s   @r7   r/  r/    s6     #
t

U\\ 
VDV=W 
r8   r/  c                        e Zd ZdeiZ	 d	dedz  f fdZeee		 d	de
j                  de
j                  dz  dee   fd                     Z xZS )
r   r   Nr2   c                     t         |   |       t        |j                  |j                  |j
                        | _        t        |j                  |j
                        | _        | j                          y rz   )
r   r   r   r4  r!   r   r   r   r   r&  r6  s      r7   r   zSLANeXtSLAHead.__init__)  s_    
 	 (?))6+=+=v?R?R)
% $.f.@.@&BUBU#V r8   rc   targetsr   c                 6   t        j                  |j                  d   | j                  j                  ft         j
                  |j                        }t        j                  |j                  d   gt         j                  |j                        }g }g }t        | j                  j                  dz         D ]  }t        j                  || j                  j                        j                         }	| j                  ||j                         |	      \  }}| j                  |      }
|
j!                  d      }|j#                  |
       |j#                  |       t        j$                  |d      j'                  | j                  j                  dz
        j)                  d      j+                         s n t        j,                  t        j$                  |d      dt         j
                        j/                  |j0                        }t3        ||      S )	Nr   )rg   device)r@   rg   r?  r   )r]   r>   r   )r   rc   )r-   r.   rH   r2   r!   ro   r?  rK   r   max_text_lengthrE   one_hotr   floatr   r   argmaxr#  stackeqanyallrn   rp   rg   r
   )r1   rc   r=  r   r  predicted_charsstructure_preds_liststructure_ids_listr\   embedding_featurestructure_stepstructure_predss               r7   ry   zSLANeXtSLAHead.forward7  s    ;;  #T[[%<%<=U]][h[o[o
  ++M,?,?,B+C5::^k^r^rs!t{{22Q67 		A !		/4;;;S;S T Z Z \77-BUBUBWYjkKHa!55h?N,333:O ''7%%o6{{-15889Q9QTU9UVZZ[]^bbd		 ))EKK0D!$LRT\a\i\ijmm
 Pdeer8   rz   )r{   r|   r}   r   r-  r:  r   r   r   r   r-   r   r   r   r   ry   r   r   s   @r7   r   r   $  s    - #t    (,f((f $f +,	f !   fr8   r   c                   b    e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   y) SLANeXtForTableRecognitionOutputam  
    head_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Hidden-states of the SLANeXtSLAHead at each prediction step, varies up to max `self.config.max_text_length` states (depending on early exits).
    head_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Attentions of the SLANeXtSLAHead at each prediction step, varies up to max `self.config.max_text_length` attentions (depending on early exits).
    Nhead_hidden_stateshead_attentions)	r{   r|   r}   r~   rP  r-   r   r   rQ  r   r8   r7   rO  rO  X  s4     48))D0704OU&&-4r8   rO  z
    SLANeXt Table Recognition model for table recognition tasks. Wraps the core SLANeXtPreTrainedModel
    and returns outputs compatible with the Transformers table recognition API.
    c            	            e Zd Zdef fdZeedej                  de	e
   deej                     ez  fd              Z xZS )SLANeXtForTableRecognitionr2   c                     t         |   |       t        |      | _        t	        |      | _        | j                          y )N)r2   )r   r   r/  r   r   headr&  r   s     r7   r   z#SLANeXtForTableRecognition.__init__m  s2     'v6"&1	r8   r   r   r<   c                      | j                   |fi |} | j                  |j                  fi |}t        |j                  |j                  |j
                  |j                  |j
                        S )N)r   rc   r   rP  rQ  )r   rU  r   rO  rc   r   )r1   r   r   backbone_outputshead_outputss        r7   ry   z"SLANeXtForTableRecognition.forwards  sp    
 )4==@@ tyy!1!C!CNvN/*<<*88'22+99(33
 	
r8   )r{   r|   r}   r   r   r   r   r-   r   r   r   r   rO  ry   r   r   s   @r7   rS  rS  f  s`    }  
!--
9?@R9S
	u  	!$D	D
  
r8   rS  )r   r/  rS  r   )8r   r   dataclassesr   r-   torch.nnr%   torch.nn.functionalrm   rE    r   r   activationsr   r   backbone_utilsr   modeling_layersr	   modeling_outputsr
   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   configuration_slanextr   r   Moduler   r   r   r   r   r   r   r   r   r	  r  r   r/  r   rO  rS  __all__r   r8   r7   <module>ri     s}  ,   !     & * 9 9 < - & I I 7 5 EB)RYY B)J+bii +B +A_ +A\bii Q3 Qh 	< 	< 	< RYY  Fr|| 4		 (6
1 6
r
, 
01f+ 1fh 	5 	5  	5 
!7 

. hr8   