
    i[                     D   d dl Z d dlmZ d dlZd dlmZ d dlmc mZ d dl	m
c mc mZ d dlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:Z:m;Z;  e/jx                  e=      Z> e,d      e G d de8                    Z? G d de:      Z@ e,d      e G d de                    ZA G d d ej                        ZC G d! d"ej                        ZD G d# d$e&      ZE G d% d&e;      ZF G d' d(eE      ZG G d) d*eE      ZHee, G d+ d,e$                    ZI e,d-.       G d/ d0eE             ZJe, e4d12       G d3 d4e                    ZKg d5ZLy)6    N)	dataclass)strict   )initialization)ACT2CLS)filter_output_hidden_states)PreTrainedConfig)TorchvisionBackend)BatchFeature)group_images_by_shapereorder_images)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDSizeDict)BaseModelOutput)PreTrainedModel)ImagesKwargsUnpack)TransformersKwargsauto_docstringcan_return_tupleis_torchdynamo_compilinglogging)
TensorTypemerge_with_config_defaults)requires)capture_outputs   )GotOcr2VisionConfig)GotOcr2VisionAttentionGotOcr2VisionEncoderz&PaddlePaddle/SLANeXt_wired_safetensors)
checkpointc                       e Zd ZU dZeed<   y)SLANeXtVisionConfig   
image_sizeN)__name__
__module____qualname__r&   int__annotations__     |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/slanext/modular_slanext.pyr$   r$   2   s     Jr-   r$   c                       e Zd Zy)SLANeXtVisionAttentionNr'   r(   r)   r,   r-   r.   r0   r0   8       r-   r0   c                        e Zd ZU dZdZdeiZdZeez  dz  e	d<   dZ
ee	d<   dZee	d<   d	Zee	d
<   dZee	d<   dZee	d<    fdZ xZS )SLANeXtConfiga  
    vision_config (`dict` or [`SLANeXtVisionConfig`], *optional*):
        Configuration for the vision encoder. If `None`, a default [`SLANeXtVisionConfig`] is used.
    post_conv_in_channels (`int`, *optional*, defaults to 256):
        Number of input channels for the post-encoder convolution layer.
    post_conv_out_channels (`int`, *optional*, defaults to 512):
        Number of output channels for the post-encoder convolution layer.
    out_channels (`int`, *optional*, defaults to 50):
        Vocabulary size for the table structure token prediction head, i.e., the number of distinct structure
        tokens the model can predict.
    hidden_size (`int`, *optional*, defaults to 512):
        Dimensionality of the hidden states in the attention GRU cell and the structure/location prediction heads.
    max_text_length (`int`, *optional*, defaults to 500):
        Maximum number of autoregressive decoding steps (tokens) for the structure and location decoder.
    slanextvision_configN   post_conv_in_channelsr%   post_conv_out_channels2   out_channelshidden_sizei  max_text_lengthc                     | j                   t               | _         n4t        | j                   t              rt        di | j                   | _         t	        |   di | y Nr,   )r6   r$   
isinstancedictsuper__post_init__selfkwargs	__class__s     r.   rC   zSLANeXtConfig.__post_init__Y   sP    %!4!6D**D1!4!Jt7I7I!JD''r-   )r'   r(   r)   __doc__
model_typer$   sub_configsr6   rA   r+   r8   r*   r9   r;   r<   r=   rC   __classcell__rG   s   @r.   r4   r4   <   sm      J"$78K7;M4--4;!$3$"%C%L#KOS( (r-   r4   c            	       x     e Zd Z fdZdej
                  dej
                  dej
                  dee   fdZ xZ	S )SLANeXtAttentionGRUCellc                    t         |           t        j                  ||d      | _        t        j                  ||      | _        t        j                  |dd      | _        t        j                  ||z   |      | _        y )NF)bias   )	rB   __init__nnLinearinput_to_hiddenhidden_to_hiddenscoreGRUCellrnn)rE   
input_sizer<   num_embeddingsrG   s       r.   rR   z SLANeXtAttentionGRUCell.__init__b   sa    !yy[uM "		+{ CYY{AE:
::j>9;Gr-   prev_hiddenbatch_hiddenchar_onehotsrF   c                    | j                  |      }| j                  |      j                  d      }||z   }t        j                  |      }| j                  |      }t        j                  |dt        j                        j                  |j                        }|j                  dd      }t        j                  ||      j                  d      }	t        j                  |	|gd      }
| j                  |
|      }||fS )NrQ   dimdtyper   )rU   rV   	unsqueezetorchtanhrW   Fsoftmaxfloat32torb   	transposematmulsqueezecatrY   )rE   r\   r]   r^   rF   batch_hidden_projprev_hidden_projattention_scoresattn_weightscontextconcat_contexthidden_statess               r.   forwardzSLANeXtAttentionGRUCell.forwardk   s     !00>00=GGJ,/?? ::&67::&67yy!1qNQQRbRhRhi#--a3,,|\:BB1EG\#:A>=l**r-   )
r'   r(   r)   rR   rd   FloatTensorr   r   ru   rK   rL   s   @r.   rN   rN   a   sL    H+&&+ ''+ ''	+
 +,+r-   rN   c                   &     e Zd Zd fd	Zd Z xZS )
SLANeXtMLPc                     t         |           t        j                  ||      | _        t        j                  ||      | _        |t        j                         | _        y t        |          | _        y N)	rB   rR   rS   rT   fc1fc2Identityr   act_fn)rE   r<   r;   
activationrG   s       r.   rR   zSLANeXtMLP.__init__   sR    99[+699[,7'1'9bkkmwz?R?Tr-   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rz   )r{   r|   r~   )rE   rt   s     r.   ru   zSLANeXtMLP.forward   s2    //M2r-   rz   )r'   r(   r)   rR   ru   rK   rL   s   @r.   rx   rx      s    Ur-   rx   c                   j     e Zd ZU eed<   dZdZdZdZddgZ	 e
j                          fd       Z xZS )	SLANeXtPreTrainedModelconfigbackbonepixel_values)imageTstructure_attention_cellstructure_generatorc                    t         |   |       t        |t              r,|j                   t        j                  |j                  d       t        |t              rL|j                  r@t        j                  |j                  d       t        j                  |j                  d       t        |t        j                        r|j                  dkD  r"dt        j                  |j                        z  nd}t        j                   |j"                  | |       t        j                   |j$                  | |       |j&                  "t        j                   |j&                  | |       |j(                  "t        j                   |j(                  | |       t        |t*              rdt        j                  | j,                  j                  dz        z  }|j.                  fD ]  }|j1                         D ]n  }t        |t        j2                        st        j                   |j4                  | |       |j6                  Mt        j                   |j6                  | |       p  yy)zInitialize the weightsNg        r   g      ?)rB   _init_weightsr@   SLANeXtVisionEncoder	pos_embedinit	constant_r0   use_rel_pos	rel_pos_h	rel_pos_wrS   rX   r<   mathsqrtuniform_	weight_ih	weight_hhbias_ihbias_hhSLANeXtSLAHeadr   r   childrenrT   weightrP   )rE   modulestd	generatorlayerrG   s        r.   r   z$SLANeXtPreTrainedModel._init_weights   s    	f% f23+v//5 f45!!v//5v//5 fbjj)9?9K9Ka9O#		&"4"455UVCMM&**SD#6MM&**SD#6~~)fnnsdC8~~)fnnsdC8 fn-		$++"9"9C"?@@C$88: A	&//1 AE!%3ellSD#> ::1 MM%**sdC@	AA .r-   )r'   r(   r)   r4   r+   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_keep_in_fp32_modules_strictrd   no_gradr   rK   rL   s   @r.   r   r      sH    "$O!&*#$>@U#V U]]_"A "Ar-   r   c                       e Zd Zy)r   Nr1   r,   r-   r.   r   r      r2   r-   r   c                   X     e Zd Z	 ddedz  f fdZdej                  dee   fdZ	 xZ
S )SLANeXtBackboneNr   c                     t         |   |       t        |j                        | _        t        j                  |j                  |j                  dddd      | _	        | j                          y )Nr   r   rQ   F)kernel_sizestridepaddingrP   )rB   rR   r   r6   vision_towerrS   Conv2dr8   r9   	post_conv	post_initrE   r   rF   rG   s      r.   rR   zSLANeXtBackbone.__init__   s^    
 	 01E1EF((&*G*GUV_`jkrw
 	r-   rt   rF   c                      | j                   |fi |}| j                  |j                        }|j                  d      j	                  dd      }t        ||j                  |j                        S )Nr   rQ   )last_hidden_statert   
attentions)r   r   r   flattenrj   r   rt   r   )rE   rt   rF   vision_outputs       r.   ru   zSLANeXtBackbone.forward   sl    )))-B6B}'F'FG%--a0::1a@+'55$//
 	
r-   rz   )r'   r(   r)   rA   rR   rd   Tensorr   r   ru   rK   rL   s   @r.   r   r      s6     #
t

U\\ 
VDV=W 
r-   r   c                        e Zd ZdeiZ	 d	dedz  f fdZeee		 d	de
j                  de
j                  dz  dee   fd                     Z xZS )
r   r   Nr   c                     t         |   |       t        |j                  |j                  |j
                        | _        t        |j                  |j
                        | _        | j                          y rz   )
rB   rR   rN   r9   r<   r;   r   rx   r   r   r   s      r.   rR   zSLANeXtSLAHead.__init__   s_    
 	 (?))6+=+=v?R?R)
% $.f.@.@&BUBU#V r-   rt   targetsrF   c                 6   t        j                  |j                  d   | j                  j                  ft         j
                  |j                        }t        j                  |j                  d   gt         j                  |j                        }g }g }t        | j                  j                  dz         D ]  }t        j                  || j                  j                        j                         }	| j                  ||j                         |	      \  }}| j                  |      }
|
j!                  d      }|j#                  |
       |j#                  |       t        j$                  |d      j'                  | j                  j                  dz
        j)                  d      j+                         s n t        j,                  t        j$                  |d      dt         j
                        j/                  |j0                        }t3        ||      S )	Nr   rb   device)sizerb   r   rQ   ra   r`   )r   rt   )rd   zerosshaper   r<   rh   r   longranger=   rf   one_hotr;   floatr   r   argmaxappendstackeqanyallrg   ri   rb   r   )rE   rt   r   rF   featurespredicted_charsstructure_preds_liststructure_ids_list_embedding_featurestructure_stepstructure_predss               r.   ru   zSLANeXtSLAHead.forward   s    ;;  #T[[%<%<=U]][h[o[o
  ++M,?,?,B+C5::^k^r^rs!t{{22Q67 		A !		/4;;;S;S T Z Z \77-BUBUBWYjkKHa!55h?N,333:O ''7%%o6{{-15889Q9QTU9UVZZ[]^bbd		 ))EKK0D!$LRT\a\i\ijmm
 Pdeer-   rz   )r'   r(   r)   rN   _can_record_outputsrA   rR   r   r   r   rd   rv   r   r   r   ru   rK   rL   s   @r.   r   r      s    - #t    (,f((f $f +,	f !   fr-   r   c                   b    e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   y) SLANeXtForTableRecognitionOutputam  
    head_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Hidden-states of the SLANeXtSLAHead at each prediction step, varies up to max `self.config.max_text_length` states (depending on early exits).
    head_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Attentions of the SLANeXtSLAHead at each prediction step, varies up to max `self.config.max_text_length` attentions (depending on early exits).
    Nhead_hidden_stateshead_attentions)	r'   r(   r)   rH   r   rd   rv   r+   r   r,   r-   r.   r   r     s4     48))D0704OU&&-4r-   r   z
    SLANeXt Table Recognition model for table recognition tasks. Wraps the core SLANeXtPreTrainedModel
    and returns outputs compatible with the Transformers table recognition API.
    )custom_introc            	            e Zd Zdef fdZeedej                  de	e
   deej                     ez  fd              Z xZS )SLANeXtForTableRecognitionr   c                     t         |   |       t        |      | _        t	        |      | _        | j                          y )N)r   )rB   rR   r   r   r   headr   )rE   r   rG   s     r.   rR   z#SLANeXtForTableRecognition.__init__#  s2     'v6"&1	r-   r   rF   returnc                      | j                   |fi |} | j                  |j                  fi |}t        |j                  |j                  |j
                  |j                  |j
                        S )N)r   rt   r   r   r   )r   r   r   r   rt   r   )rE   r   rF   backbone_outputshead_outputss        r.   ru   z"SLANeXtForTableRecognition.forward)  sp    
 )4==@@ tyy!1!C!CNvN/*<<*88'22+99(33
 	
r-   )r'   r(   r)   r4   rR   r   r   rd   rv   r   r   tupler   ru   rK   rL   s   @r.   r   r     s`    }  
!--
9?@R9S
	u  	!$D	D
  
r-   r   )rd   )backendsc                        e Zd ZdZeZeZdddZdddZ	dZ
dZdZdZdZdddeddfd	Zd
ed   dededddedededededeee   z  dz  deee   z  dz  dedz  dedz  dedz  deez  dz  def dZdee   f fdZd Zd Z xZS )SLANeXtImageProcessorr   r%   )heightwidthTr   ztorch.Tensorr   r   c                 n
   |j                   \  }}}}|j                  ||z  ||      }|j                  }t        |j                  |j
                        t        ||      z  }t        ||z        }	t        ||z        }
t        j                  |
t        j                  |      }|dz   t        |      t        |
      z  z  dz
  }|j                         j                  t        j                        }||j                         z
  }t        j                  |dk  t        j                  |      |      }t        j                  |dk  t        j                  |      |      }t        j                  ||dz
  k\  t        j                   |      |      }t        j                  ||dz
  k\  t        j"                  ||dz
        |      }|dz  dz   j                         j                  t        j                        }d|z
  }t        j                  |	t        j                  |      }|dz   t        |      t        |	      z  z  dz
  }|j                         j                  t        j                        }||j                         z
  }t        j                  |dk  t        j                  |      |      }t        j                  |dk  t        j                  |      |      }t        j                  ||dz
  k\  t        j                   |      |      }t        j                  ||dz
  k\  t        j"                  ||dz
        |      }|dz  dz   j                         j                  t        j                        }d|z
  }|j%                  dd      j                  t        j&                        }|j                  t        j                        }|j)                         }|dz   j)                         }|j)                         }|dz   j)                         }|d d |d d d f   |d d d f   f   }|d d |d d d f   |d d d f   f   }|d d |d d d f   |d d d f   f   }|d d |d d d f   |d d d f   f   } |j                  d|	d      }!|j                  d|	d      }"|j                  dd|
      }#|j                  dd|
      }$|"|$|z  |#|z  z   z  |!|$|z  |#| z  z   z  z   }%|%dz   d	z	  }%|%j%                  dd      j                  t        j&                        }&|&j                  |||	|
      j                  |j*                  
      S )Nr   g      ?r   rQ   r   i      i       )rb   )r   viewr   maxr   r   roundrd   arangerh   r   floorri   int32where
zeros_like	ones_like	full_likeclampuint8r   rb   )'rE   r   r   
batch_sizechannelsr   r   r   scaletarget_heighttarget_width
target_colsrc_colsrc_col_floorsrc_col_fracweight_rightweight_left
target_rowsrc_rowsrc_row_floorsrc_row_fracweight_bottom
weight_topimage_uint8image_int32col_left	col_rightrow_top
row_bottompixel_top_leftpixel_top_rightpixel_bottom_leftpixel_bottom_rightweight_bottom_3dweight_top_3dweight_right_3dweight_left_3dinterpresults'                                          r.   _resizezSLANeXtImageProcessor._resizeG  s   
 /4kk+
Hfe

:0&%@DKK,s65/AAfun-UU]+\\,emmFS
#eu\7J(JKcQ**5;;7!4!4!66{{=1#4e6F6F|6TVbcMA$5u7G7G7VXef{{=EAI#=u|?\^jkUQY&uqy(QS`
 %t+c188:==ekkJ\)\\-u}}VT
#fm8L(LMPSS**5;;7!4!4!66{{=1#4e6F6F|6TVbcMA$5u7G7G7VXef{{=FQJ#>P\@]_klVaZ'QR
)SUb
 &,s299;>>u{{KM)
kk!S),,U[[9!nnU[[1 %%'"Q&,,.	$$&#a'--/
$Q4(8(47:K%KL%aD)99T1W;M&MN':ag+>q@Q(QR(Jq$w,?4QR7AS)ST(--aB"=!<&++Aq,?$))!Q=^+o.OO
1B B_WiEi ijk G$+a%((5{{:xMPPW\WbWbPccr-   images	do_resizeresamplez"tvF.InterpolationMode | int | Nonedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_meanN	image_stddo_padpad_sizedisable_groupingreturn_tensorsc           	         |t               st        j                  d       t        ||      \  }}i }|j	                         D ]  \  }}|r| j                  ||      }|||<   ! t        ||      }t        ||      \  }}i }|j	                         D ]4  \  }}|r| j                  ||      }| j                  ||||	|
|      }|||<   6 t        ||      }|r| j                  |||      }t        d|i|      S )Nz&Resampling is not supported in SLANeXt)r&  )r   r   )r%  r&  r   )datatensor_type)r   loggerwarning_oncer   itemsr  r   center_croprescale_and_normalizepadr   )rE   r  r  r   r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  rF   grouped_imagesgrouped_images_indexresized_images_groupedr   stacked_imagesresized_imagesprocessed_images_groupedprocessed_imagess                            r.   _preprocessz!SLANeXtImageProcessor._preprocess  s@   & (@(B HI 0EV^n/o,,!#%3%9%9%; 	;!E>!%N!N,:"5)	; ((>@TU 0E^fv/w,,#% %3%9%9%; 	=!E>!%!1!1.)!L!77
NL*V_N /=$U+	= **BDXY#xx(88^nxo.2B!CQ_``r-   rF   c                 D    t        |   di | | j                          y r?   )rB   rR   init_decoderrD   s     r.   rR   zSLANeXtImageProcessor.__init__  s    "6"r-   c                    g d}|t        d      D cg c]  }d|dz    d c}z  }|t        d      D cg c]  }d|dz    d c}z  }d|vr|j                  d       d|v r|j                  d       d	g|z   d
gz   }t        |      D ci c]  \  }}||
 c}}| _        || _        g d| _        | j                  d	   | _        | j                  d
   | _        yc c}w c c}w c c}}w )a  
        Initialize the decoder vocabulary for table structure recognition.

        Builds a character dictionary mapping HTML table structure tokens (e.g., `<thead>`, `<tr>`, `<td>`, colspan/
        rowspan attributes) to integer indices. The dictionary includes special `"sos"` (start-of-sequence) and
        `"eos"` (end-of-sequence) tokens. Merged `<td></td>` tokens are used in place of standalone `<td>` tokens
        when applicable.
        )
z<thead>z</thead>z<tbody>z</tbody>z<tr>z</tr><td><td>z</td>   z
 colspan="r   "z
 rowspan="	<td></td>r<  soseos)r<  r=  rA  N)	r   r   remove	enumeraterA   	charactertd_tokenbos_ideos_id)rE   dict_characterichars       r.   r:  z"SLANeXtImageProcessor.init_decoder  s    
 	%)DQZAwa0DD%)DQZAwa0DDn,!!+.^#!!&)>1UG;,5n,EFDT1WF	'4ii&ii& ED Gs   CC!C&c                    |j                   | _        | j                  dd }t        | j                        t        | j                        g}t        | j                        }|j                  d      }|j                  d      j                  }g }|j                  d   }t        |      D ]  }g }	g }
t        |j                  d         D ]Y  }t        |||f         }|dkD  r||k(  r n=||v r$| j                  |   }|	j                  |       |
j                  |||f          [ |j                  |	       t        j                  |
      j                         j                         } g d|d   z   g dz   }|dS )aO  
        Post-process the raw model outputs to decode the predicted table structure into an HTML token sequence.

        Converts the model's predicted probability distributions over the structure vocabulary into a sequence of
        HTML tokens representing the table structure. The decoded tokens are wrapped with `<html>`, `<body>`, and
        `<table>` tags to form a complete HTML table structure.

        Args:
            outputs ([`SLANeXtForTableRecognitionOutput`]):
                Raw outputs from the SLANeXt model. The `last_hidden_state` field contains the predicted probability
                distributions over the structure vocabulary at each decoding step, with shape
                `(batch_size, max_text_length, num_classes)`.

        Returns:
            `dict`: A dictionary containing:
                - **structure** (`list[str]`): The predicted HTML table structure as a list of tokens, wrapped with
                  `<html>`, `<body>`, and `<table>` tags.
                - **structure_score** (`float`): The mean confidence score across all predicted tokens.
        r   rQ   r   r   )z<html>z<body>z<table>)z</table>z</body>z</html>)	structurestructure_score)r   predr*   rH  rI  r   r   valuesr   r   rF  r   rd   r   meanitem)rE   outputsstructure_probsignored_tokensend_idxstructure_idxstructure_str_listr   batch_indexstructure_list
score_listpositionchar_idxtextrO  rN  s                   r.   post_process_table_recognitionz4SLANeXtImageProcessor.post_process_table_recognition  s   ( --	))Aa.dkk*C,<=dkk"'..1.5)--!-4;;"((+
 , 	DKNJ!-"5"5a"89 J}[(-BCDa<H$7~-~~h/%%d+!!/+x2G"HIJ %%n5#kk*5::<AACO	D 46H6KKNpp	&?KKr-   )r'   r(   r)   r  r   r"  r   r#  r   r%  do_convert_rgbr  r  r!  r$  r   r  listboolr   strr   r   r8  r   r   rR   r:  r`  rK   rL   s   @r.   r   r   9  st    H&J$IC(D,HNIJLF@d@d @d 
	@dD0a^$0a 0a 	0a
 70a 0a 0a 0a 0a 0a DK'$.0a 4;&-0a t0a T/0a +0a  j(4/!0a$ 
%0ad!5 "'H.Lr-   r   )r   r4   r   r   r   r   )Mr   dataclassesr   rd   torch.nnrS   torch.nn.functional
functionalrf   $torchvision.transforms.v2.functional
transformsv2tvFhuggingface_hub.dataclassesr    r   r   activationsr   backbone_utilsr   configuration_utilsr	   image_processing_backendsr
   image_processing_utilsr   image_transformsr   r   image_utilsr   r   r   modeling_outputsr   modeling_utilsr   processing_utilsr   r   utilsr   r   r   r   r   utils.genericr   r   utils.import_utilsr   utils.output_capturingr   got_ocr2.configuration_got_ocr2r   got_ocr2.modeling_got_ocr2r    r!   
get_loggerr'   r+  r$   r0   r4   ModulerN   rx   r   r   r   r   r   r   r   __all__r,   r-   r.   <module>r     s     !     2 2 . & " 9 3 ; 2 E P P / - 4 l l C * 5 A 
		H	% CD-   E	3 	 CD ($  (  E (F+bii +B +A_ +A\	/ 	
, 
01f+ 1fh 	5 	5  	5 
!7 

. 	:VL. VL  VLrr-   