
    iU                        d dl mZ d dlZd dlmZ d dlmc mZ d dlm	Z	 ddl
mZ ddlmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z) ddl*m+Z+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6  e(d      e	 G d dee                    Z7 e(d      e	 G d de                    Z8e( e.d       G d  d!e                    Z9 G d" d#e4      Z: G d$ d%ejv                        Z< G d& d'ejv                        Z= G d( d)ejv                        Z> G d* d+e      Z? G d, d-ejv                        Z@e( G d. d/e6             ZA G d0 d1eA      ZB e(d23       G d4 d5eeA             ZC G d6 d7ejv                        ZD e(d83       G d9 d:eA             ZEg d;ZFy)<    )SequenceN)strict   )ACT2FN)BackboneConfigMixinBackboneMixin%consolidate_backbone_kwargs_to_configfilter_output_hidden_states)PreTrainedConfig)BatchFeature)TorchvisionBackend)group_images_by_shapereorder_images)PILImageResamplingSizeDict)GradientCheckpointingLayer)BackboneOutputBaseModelOutputWithNoAttention)PreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)
TensorTypemerge_with_config_defaults)requires)capture_outputs   )
AutoConfig)PPLCNetConvLayer)PPOCRV5ServerDetPreTrainedModelzPaddlePaddle/UVDoc_safetensors)
checkpointc                        e Zd ZU dZdZdZee   dz  ed<   dZ	ee
   dz  ed<   dZeee
   ee
df   z     ed<   d	Zeeee
e
e
ef   ee
ez     z        ed
<   dZeeee
df   ee
   z        ed<   dZe
ed<    fdZ xZS )UVDocBackboneConfiga  
    resnet_head (`Sequence[list[int] | tuple[int, ...]]`, *optional*, defaults to `((3, 32), (32, 32))`):
        Configuration for the ResNet head layers in format [in_channels, out_channels].
    resnet_configs (`Sequence[Sequence[tuple[int, int, int, bool] | list[int | bool]]]`, *optional*, defaults to `(((32, 32, 1, False),
        (32, 32, 3, False), (32, 32, 3, False)), ((32, 64, 1, True), (64, 64, 3, False), (64, 64, 3, False), (64, 64, 3, False)), ((64, 128, 1, True),
        (128, 128, 3, False), (128, 128, 3, False), (128, 128, 3, False), (128, 128, 3, False), (128, 128, 3, False)))`):
        Configuration for the ResNet stages in format [in_channels, out_channels, dilation_value, downsample].
    stage_configs (Sequence[Sequence[tuple[int, ...] | list[int]]], *optional*, defaults to `(((128, 1),), ((128, 2),),
        ((128, 5),), ((128, 8),(128, 3),(128, 2),), ((128, 12), (128, 7), (128, 4),), ((128, 18), (128, 12), (128, 6),),)`):
        Configuration for the bridge module stages in format [in_channels, dilation_value].
        Each inner sequence corresponds to a single bridge block, and the outer sequence groups blocks by bridge stage.
    uvdoc_backboneN_out_features_out_indices))r       )r(   r(   .resnet_head)))r(   r(      Fr(   r(   r   Fr+   ))r(   @   r*   Tr,   r,   r   Fr-   r-   ))r,      r*   Tr.   r.   r   Fr/   r/   r/   r/   resnet_configs)))r.   r*   )r.   r   ))r.      ))r.      )r.   r   r1   )r.      )r.      )r.      ))r.      r4   )r.      stage_configsr2   kernel_sizec                 f   | j                   D cg c]  }t        |       c}| _        dgt        dt        | j                         dz         D cg c]  }d| 	 c}z   | _        | j                  |j                  dd       |j                  dd              t        |    di | y c c}w c c}w )Nstemr*   stageout_indicesout_features)r?   r@    )	r:   lendepthsrangestage_names"set_output_features_output_indicespopsuper__post_init__)selfkwargsstagesidx	__class__s       x/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/uvdoc/modular_uvdoc.pyrI   z!UVDocBackboneConfig.__post_init__w   s    151C1CDvs6{D"8aTM_M_I`cdId@e&fse}&ff//

=$7fjjQ_aeFf 	0 	
 	'' E&fs   B)B.)__name__
__module____qualname____doc__
model_typer&   liststr__annotations__r'   intr)   r   tupler0   boolr:   r;   rI   __classcell__rN   s   @rO   r$   r$   0   s     "J&*M49t#*%)L$s)d"):K$s)eCHo56 
YNHXeCc4,?&@4d
CS&STU ,FM8HU38_tCy%@AB * K( (    r$   c                        e Zd ZU dZdZdeiZdZee	z  dz  e
d<   dZee
d<   dZee
d<   d	Zee
d
<   dZee   eedf   z  e
d<   dZeee   eedf   z     e
d<    fdZ xZS )UVDocConfiga  
    padding_mode (`str`, *optional*, defaults to `"reflect"`):
        Padding mode for convolutional layers. Supported modes are `"reflect"`, `"constant"`, and `"replicate"`.
    kernel_size (`int`, *optional*, defaults to 5):
        Kernel size for convolutional layers in the backbone network.
    bridge_connector (`list[int] | tuple[int, ...]`, *optional*, defaults to `(128, 128)`):
        Configuration for the bridge connector in format [in_channels, out_channels].
    out_point_positions2D (`Sequence[list[int] | tuple[int, ...]]`, *optional*, defaults to `((128, 32), (32, 2))`):
        Configuration for the output point positions 2D layer in format [in_channels, out_channels].
    uvdocbackbone_configNprelu
hidden_actreflectpadding_moder2   r;   )r.   r.   .bridge_connector))r.   r(   )r(   r   out_point_positions2Dc                 d    t        d| j                  dd|\  | _        }t        |   di | y )Nr%   )ra   default_config_typerA   )r	   ra   rH   rI   )rJ   rK   rN   s     rO   rI   zUVDocConfig.__post_init__   sB    'L (
 00 0(
 (
$f
 	''r]   )rP   rQ   rR   rS   rT   r   sub_configsra   dictr   rW   rc   rV   re   r;   rX   rf   rU   rY   rg   r   rI   r[   r\   s   @rO   r_   r_      s    	 J$j1K6:OT,,t3:J!L#!K4>d3i%S/1>CW8DIc3h$?@W( (r]   r_   )torch)backendsc                      e Zd ZdZdZdddZej                  Zde	d   de
ded	e
d
ede
dee	e   z  dz  dee	e   z  dz  de
dz  deez  dz  defdZ	 ddej$                  de	ej$                     dede	eeej$                  f      fdZy)UVDocImageProcessorTi  i  )heightwidthimagesztorch.Tensor	do_resizesize
do_rescalerescale_factordo_normalize
image_meanN	image_stddisable_groupingreturn_tensorsreturnc           	         t        ||	      \  }}i }|j                         D ]1  \  }}| j                  ||||||      }|d d g dd d d d f   }|||<   3 t        ||      }|j	                         }t        ||	      \  }}i }|j                         D ];  \  }}|r/t        j                  ||j                  |j                  fdd      }|||<   = t        ||      }t        ||d|
dg      S )	N)rz   )r   r*   r   bilinearTrt   modealign_corners)pixel_valuesoriginal_imagesr   )datatensor_typeskip_tensor_conversion)
r   itemsrescale_and_normalizer   copyFinterpolaterp   rq   r   )rJ   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   rK   grouped_imagesgrouped_images_indexprocessed_images_groupedshapestacked_imagesrescale_and_normalize_imagesr   interpolated_images_groupedr   s                        rO   _preprocesszUVDocImageProcessor._preprocess   s9    0EV^n/o,,#% %3%9%9%; 	=!E>!77
NL*V_N ,Ay!Q,>?N.<$U+	= (66NPd'e$6;;=/D(;K0
,, ')#%3%9%9%; 	@!E>!""$++tzz)Bcg" 2@'.	@ &&ACWX".?S&$5#6
 	
r]   
predictionr   scalec                    t        |      }t        j                  t        |      |j                        }g }t        |      D ]  \  }}|j                  dk(  r|j                  d      }|j                  |j                        }|j                  dd \  }}	t        j                  |||dz    ||	fdd	      }
|
j                  dddd      }t        j                  ||d
      }|j                  d      j                  ddd      }||z  }|j                  dg      j                  t        j                   dd      }|j#                  d|i        |S )a  
        Post-process document rectification predictions to convert them into rectified images.

        Args:
            prediction: Predicted 2D Bezier mesh coordinates, shape (B, 2, H, W)
            original_images: List of original input tensors, each of shape (C, H_i, W_i). Images may have different sizes.
            scale: Scaling factor for output images (default: 255.0)

        Returns:
            List of dictionaries containing rectified images. Each dictionary has:
                - "images": Rectified image tensor of shape (H, W, 3) with dtype torch.uint8
                          and BGR channel order (suitable for OpenCV visualization)
        )devicer   r   r   Nr*   r~   Tr   )r   )dimsF)dtypenon_blockingr   rr   )rU   rl   tensorfloatr   	enumeratendim	unsqueezetor   r   r   permutegrid_samplesqueezeflipuint8append)rJ   r   r   r   
image_listresultsioriginal_imageoriginal_heightoriginal_widthupsampled_meshrearranged_mesh	rectifiedimages                 rO   #post_process_document_rectificationz7UVDocImageProcessor.post_process_document_rectification   sW   & /*
U5\*2C2CD!*:!6 	.A~""a'!/!9!9!!<+..z/@/@AN.<.B.B12.F+O^ ]]1q1u%%~6"	N -44Q1a@O noUYZI %%a(00Aq9E EMEJJRDJ),,5;;TX],^ENNHe,-9	.< r]   )g     o@)rP   rQ   rR   ru   rs   rt   r   BILINEARresamplerU   rZ   r   r   rV   r   r   r   rl   Tensorrk   r   rA   r]   rO   ro   ro      s    JIC(D!**H/
^$/
 /
 	/

 /
 /
 /
 DK'$./
 4;&-/
 +/
 j(4//
 
/
j 	5LL5 ell+5 	5
 
d3$%	&5r]   ro   c                   X     e Zd ZdZ	 	 	 	 	 	 	 ddededededededed	ed
ef fdZ xZS )UVDocConvLayerz<Convolutional layer with batch normalization and activation.in_channelsout_channelsr;   stridepaddingre   biasdilation
activationc
           
      f    t         
|           t        j                  ||||||||      | _        y )N)r   r;   r   r   re   r   )rH   __init__nnConv2dconvolution)rJ   r   r   r;   r   r   re   r   r   r   rN   s             rO   r   zUVDocConvLayer.__init__  s9     	99#%	
r]   )r   r*   r   zerosFr*   relu)	rP   rQ   rR   rS   rX   rV   rZ   r   r[   r\   s   @rO   r   r     s    F # 

 
 	

 
 
 
 
 
 
 
r]   r   c                        e Zd ZdZ	 	 	 	 	 ddededededededed	ef fd
Zdej                  dej                  fdZ
 xZS )UVDocResidualBlockz*Base residual block with dilation support.r   r   r;   r   r   r   
downsampler   c	           
      4   t         	|           |rt        |||||dz  dd       nt        j                         | _        t        ||||||d      | _        t        |||d|d|d       | _        |t        |   | _	        y t        j                         | _	        y )Nr   T)r   r   r;   r   r   r   r   )r   r   r;   r   r   r   r   r*   )r   r   r;   r   r   r   r   r   )
rH   r   r   r   Identity	conv_down
conv_start
conv_finalr   act_fn)
rJ   r   r   r;   r   r   r   r   r   rN   s
            rO   r   zUVDocResidualBlock.__init__2  s     	  ')'#q(  	 )#%#
 )$%#	
 -7,BfZ(r]   hidden_statesr|   c                     | j                  |      }| j                  |      }| j                  |      }||z   }| j                  |      }|S N)r   r   r   r   )rJ   r   residuals      rO   forwardzUVDocResidualBlock.forwardd  sJ    >>-066%0M2r]   )r*   r   r*   Fr   )rP   rQ   rR   rS   rX   rZ   rV   r   rl   r   r   r[   r\   s   @rO   r   r   /  s    4   0V0V 0V 	0V
 0V 0V 0V 0V 0VdU\\ ell r]   r   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZS )UVDocResNetStagez3A ResNet stage containing multiple residual blocks.c                    t         |           |j                  |   }t        j                  g       | _        |D ]C  \  }}}}| j
                  j                  t        |||rdnd|dz  |||j                               E y )Nr   r*   )r   r   r   r   r   r   r;   )	rH   r   r0   r   
ModuleListlayersr   r   r;   )	rJ   configstage_indexrL   r   r   r   r   rN   s	           rO   r   zUVDocResNetStage.__init__p  s    &&{3mmB'?E 	;KxKK" +!- *1$qL%) & 2 2
	r]   r   r|   c                 8    | j                   D ]
  } ||      } |S r   )r   )rJ   r   layers      rO   r   zUVDocResNetStage.forward  s%    [[ 	1E!-0M	1r]   	rP   rQ   rR   rS   r   rl   r   r   r[   r\   s   @rO   r   r   m  s&    =$U\\ ell r]   r   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZS )UVDocResNetz$Initial resnet_head and resnet_down.c                 $   t         |           t        j                  g       | _        t        t        |j                              D ]b  }| j                  j                  t        |j                  |   d   |j                  |   d   |j                  d|j                  dz               d t        j                  g       | _
        t        t        |j                              D ])  }t        ||      }| j                  j                  |       + y )Nr   r*   r   )r   r   r;   r   r   )rH   r   r   r   r)   rD   rB   r   r   r;   resnet_downr0   r   )rJ   r   r   r   r>   rN   s        rO   r   zUVDocResNet.__init__  s    ==,s6--./ 		A## & 2 21 5a 8!'!3!3A!6q!9 & 2 2"..!3		 ==, V%:%:!;< 	+K$V[9E##E*	+r]   r   r|   c                 j    | j                   D ]
  } ||      } | j                  D ]
  } ||      } |S r   )r)   r   )rJ   r   headr>   s       rO   r   zUVDocResNet.forward  sF    $$ 	0D /M	0%% 	1E!-0M	1r]   r   r\   s   @rO   r   r     s&    .+&U\\ ell r]   r   c                   d     e Zd ZdZ fdZdej                  dee   dej                  fdZ	 xZ
S )UVDocBridgeBlockzDBridge module with dilated convolutions for long-range dependencies.c           	          t         |           t        j                  g       | _        |j
                  |   }|D ]-  \  }}| j                  j                  t        ||||             / y )N)r   r   )rH   r   r   r   blocksr:   r   r   )rJ   r   bridge_indexbridger   r   rN   s         rO   r   zUVDocBridgeBlock.__init__  sc    mmB'%%l3%+ 	n!KKK~k;PXcklm	nr]   r   rK   r|   c                 8    | j                   D ]
  } ||      } |S r   )r   )rJ   r   rK   blocks       rO   r   zUVDocBridgeBlock.forward  s'    
 [[ 	1E!-0M	1r]   )rP   rQ   rR   rS   r   rl   r   r   r   r   r[   r\   s   @rO   r   r     s;    Nn|| +, 
	r]   r   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZS )UVDocPointPositions2DzDModule for predicting 2D point positions for document rectification.c           	         t         |           t        |j                  d   d   |j                  d   d   |j                  d|j                  dz  |j
                  |j                        | _        t        j                  |j                  d   d   |j                  d   d   |j                  d|j                  dz  |j
                        | _
        y )Nr   r*   r   )r   r   r;   r   r   re   r   )r   r   r;   r   r   re   )rH   r   r   rg   r;   re   rc   r   r   r   conv_uprJ   r   rN   s     rO   r   zUVDocPointPositions2D.__init__  s    '44Q7:55a8;**&&!+,,((
 yy44Q7:55a8;**&&!+,,
r]   r   r|   c                 J    | j                  |      }| j                  |      }|S r   )r   r   )rJ   r   s     rO   r   zUVDocPointPositions2D.forward  s$    }5]3r]   r   r\   s   @rO   r   r     s&    N
,U\\ ell r]   r   c                   F    e Zd ZdZdeiZ ej                         d        Zy)UVDocPreTrainedModelTr   c                     t        j                  |       	 t        |t        j                        r|j                          y y r   )r   _init_weights
isinstancer   PReLUreset_parameters)rJ   modules     rO   r   z"UVDocPreTrainedModel._init_weights  s2    %%f-%fbhh'##% (r]   N)	rP   rQ   rR   supports_gradient_checkpointingr   _can_record_outputsrl   no_gradr   rA   r]   rO   r   r     s2    &*#) U]]_& &r]   r   c                   t     e Zd Z fdZeedej                  dee	   dej                  fd              Z
 xZS )UVDocBridgec                    t         |   |       t        j                  g       | _        t        t        |j                              D ]'  }| j                  j                  t        ||             ) | j                          y r   )rH   r   r   r   r   rD   rB   r:   r   r   	post_init)rJ   r   r   rN   s      rO   r   zUVDocBridge.__init__  se     mmB'!#f&:&:";< 	GLKK/EF	Gr]   r   rK   r|   c                 L    | j                   D ]
  } ||      } t              S )N)last_hidden_state)r   r   )rJ   r   rK   r   features        rO   r   zUVDocBridge.forward  s,     [[ 	+EM*G	+-HHr]   )rP   rQ   rR   r   r   r   rl   r   r   r   r   r[   r\   s   @rO   r   r     sS      I||I +,I 
	I   Ir]   r   z6
    UVDoc backbone model for feature extraction.
    )custom_introc            	       x     e Zd ZdZdZdef fdZeee	de
j                  dee   defd                     Z xZS )	UVDocBackboneFbackboner   c                    t         |   |       |j                  d   d   g}|j                  D ]  }|j	                  |d   d           || _        t        |      | _        t        |      | _	        | j                          y )Nr   r   r*   )rH   r   r)   r:   r   num_featuresr   resnetr   r   r   )rJ   r   r	  r>   rN   s       rO   r   zUVDocBackbone.__init__  s}     **2.r23)) 	-Ea,	-(!&)!&)r]   r   rK   r|   c                    d|d<   | j                  |      } | j                  |fi |}d}t        | j                        D ]'  \  }}|| j                  v s||j
                  |   fz  }) t        ||j
                        S )NToutput_hidden_statesrA   )feature_mapsr   )r
  r   r   rE   r@   r   r   )rJ   r   rK   r   outputsr  rM   r>   s           rO   r   zUVDocBackbone.forward  s     *.%&L1$++m6v6#D$4$45 	>JC)))!6!6s!; ==	> %!//
 	
r]   )rP   rQ   rR   has_attentionsbase_model_prefixr$   r   r   r
   r   rl   FloatTensorr   r   r   r   r[   r\   s   @rO   r  r    si     N"2   
''
 +,
 
	
  ! 
r]   r  c                   t     e Zd Z fdZdej
                  dee   dej                  j
                  fdZ xZ	S )	UVDocHeadc                    t         |           t        |j                  j                        | _        t        |j                  d   | j
                  z  |j                  d   dddd      | _        t        |      | _	        y )Nr   r*   )r   r   r;   r   r   r   )
rH   r   rB   ra   r:   num_bridge_layersr   rf   r   rg   r   s     rO   r   zUVDocHead.__init__*  sv    !$V%;%;%I%I!J .//2T5K5KK003!
 &;6%B"r]   r   rK   r|   c                 J    | j                  |      }| j                  |      }|S r   )rf   rg   )rJ   r   rK   s      rO   r   zUVDocHead.forward9  s*    
 --m<22=Ar]   )
rP   rQ   rR   r   rl   r   r   r   r   r[   r\   s   @rO   r  r  )  s>    C|| +, 
			r]   r  z
    The model takes raw document images (pixel values) as input, processes them through the UVDoc backbone to predict spatial transformation parameters,
    and outputs the rectified (corrected) document image tensor.
    c            	            e Zd Zdef fdZeedej                  de	e
   deej                     ez  fd              Z xZS )
UVDocModelr   c                     t         |   |       t        |j                        | _        t        |      | _        | j                          y r   )rH   r   r  ra   r  r  r   r   r   s     rO   r   zUVDocModel.__init__J  s8     %f&<&<=f%	r]   r   rK   r|   c                      | j                   |fi |}t        j                  |j                  d      } | j                  |fi |}t        ||j                        S )Nr*   )dim)r  r   )r  rl   catr  r   r   r   )rJ   r   rK   backbone_outputsfused_outputsr  s         rO   r   zUVDocModel.forwardQ  s`     )4==@@		"2"?"?QG%DIIm>v>-/*88
 	
r]   )rP   rQ   rR   r_   r   r   r   rl   r  r   r   rY   r   r   r[   r\   s   @rO   r  r  C  sd    {  
''
 +,
 
u  	!$B	B	
  
r]   r  )r   r  r$   ro   r_   r  r   )Gcollections.abcr   rl   torch.nnr   torch.nn.functional
functionalr   huggingface_hub.dataclassesr   activationsr   backbone_utilsr   r   r	   r
   configuration_utilsr   feature_extraction_utilsr   image_processing_backendsr   image_transformsr   r   image_utilsr   r   modeling_layersr   modeling_outputsr   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.import_utilsr   utils.output_capturingr   autor   pp_lcnet.modeling_pp_lcnetr    0pp_ocrv5_server_det.modeling_pp_ocrv5_server_detr!   r$   r_   ro   r   Moduler   r   r   r   r   r   r   r  r  r  __all__rA   r]   rO   <module>r8     s    %     . !  4 4 ; E 7 9 N - & I I C * 5  9 ^ ;<K(-/? K(  =K(\ ;<(" (  =(> 	:l, l  l^
% 
:; ;|ryy 6")) <1 (BII > &: & &I& I( 
%
M#7 %

%
P		 4 
% 

2r]   