
    i                     6   d dl mZ d dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZmZmZmZmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/  ej`                  e1      Z2 ed      e G d de                    Z3e ed       G d de                    Z4 G d d ed!"      Z5 G d# d$e-      Z6 ed%&       G d' d(e+             Z7 G d) d*e	jp                        Z9 G d+ d,e(      Z: G d- d.e$      Z; G d/ d0e	jp                        Z<d1ejz                  d2ejz                  d3ejz                  d4ejz                  fd5Z> G d6 d7e	jp                        Z? G d8 d9e	jp                        Z@e G d: d;e             ZAd<ejz                  d=eBd4eCejz                  ejz                  f   fd>ZDd?ejz                  d@eEdAeEd4ejz                  fdBZF edC       G dD dEeA             ZGg dFZHy)G    )Callable)	dataclassN)strict)nnpad_sequence   )PreTrainedConfig)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONSPreTrainedModel)ImagesKwargsUnpack)ModelOutput
TensorTypeauto_docstringcan_return_tuplelogging)requires   )CONFIG_MAPPING
AutoConfig)AutoModelForKeypointDetection)CLIPMLP)apply_rotary_pos_emb)LlamaAttentioneager_attention_forward)SuperGlueImageProcessorPil)SuperGlueImageProcessor)SuperPointConfigzETH-CVG/lightglue_superpoint)
checkpointc                        e Zd ZU dZdZdeiZdZee	z  dz  e
d<   dZee
d<   dZee
d<   d	Zee
d
<   dZedz  e
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   dZee
d<   dZeez  e
d<   dZee
d<    fdZd Z xZS )LightGlueConfigaG  
    keypoint_detector_config (`Union[AutoConfig, dict]`,  *optional*, defaults to `SuperPointConfig`):
        The config object or dictionary of the keypoint detector.
    descriptor_dim (`int`, *optional*, defaults to 256):
        The dimension of the descriptors.
    depth_confidence (`float`, *optional*, defaults to 0.95):
        The confidence threshold used to perform early stopping
    width_confidence (`float`, *optional*, defaults to 0.99):
        The confidence threshold used to prune points
    filter_threshold (`float`, *optional*, defaults to 0.1):
        The confidence threshold used to filter matches

    Examples:
        ```python
        >>> from transformers import LightGlueConfig, LightGlueForKeypointMatching

        >>> # Initializing a LightGlue style configuration
        >>> configuration = LightGlueConfig()

        >>> # Initializing a model from the LightGlue style configuration
        >>> model = LightGlueForKeypointMatching(configuration)

        >>> # Accessing the model configuration
        >>> configuration = model.config
        ```
    	lightgluekeypoint_detector_configN   descriptor_dim	   num_hidden_layers   num_attention_headsnum_key_value_headsgffffff?depth_confidencegGz?width_confidence皙?filter_thresholdg{Gz?initializer_rangegelu
hidden_act        attention_dropoutTattention_biasc                    | j                   | j                  | _         t        | j                  t              rX| j                  j                  dd      | j                  d<   t        | j                  d      di | j                  ddi| _        n | j                  t        d   d      | _        | j                  dz  | _        | j                  | _	        t        | ,  di | y )N
model_type
superpointattn_implementationeager)r:   r    )r,   r+   
isinstancer%   dictgetr   r'   intermediate_sizehidden_sizesuper__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/lightglue/modular_lightglue.pyrC   zLightGlueConfig.__post_init__X   s    ##+'+'?'?D$ d33T::>:W:W:[:[\hjv:wD)),7,:4;X;XYe;f,g -//-EL-D) **2,:<,H]d,eD)!%!4!4q!8..''    c                 R    | j                   | j                  z  dk7  rt        d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   z1descriptor_dim % num_heads is different from zeroN)r'   r+   
ValueError)rD   s    rG   validate_architecturez%LightGlueConfig.validate_architecturej   s,    !9!99Q>PQQ ?rH   )__name__
__module____qualname____doc__r8   r   sub_configsr%   r>   r    __annotations__r'   intr)   r+   r,   r-   floatr.   r0   r1   r3   strr5   r6   boolrC   rK   __classcell__rF   s   @rG   r#   r#   *   s    6 J-z:K?Cd%55<CNCs  &*t*"e""e"!e!#u#J%(us{(ND($RrH   r#   a  
    Base class for outputs of LightGlue keypoint matching models. Due to the nature of keypoint detection and matching,
    the number of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the
    batch of images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask
    tensor is used to indicate which values in the keypoints, matches, matching_scores and prune tensors are keypoint
    matching information.
    )custom_introc                   ^   e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  dz  ed<   dZ
ej                  dz  ed<   dZej                  dz  ed<   dZej                  dz  ed<   dZeej                     dz  ed	<   dZeej                     dz  ed
<   y)LightGlueKeypointMatchingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
        Loss computed during training.
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    prune (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
        Pruning mask indicating which keypoints are removed and at which layer.
    mask (`torch.BoolTensor` of shape `(batch_size, num_keypoints)`):
        Mask indicating which values in matches, matching_scores, keypoints and prune are keypoint matching
        information.
    hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)` returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`
    attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)` returned when `output_attentions=True` is passed or when
        `config.output_attentions=True`
    Nlossmatchesmatching_scores	keypointsprunemaskhidden_states
attentions)rL   rM   rN   rO   r[   torchFloatTensorrQ   r\   r]   r^   r_   	IntTensorr`   ra   tuplerb   r<   rH   rG   rZ   rZ   p   s    0 &*D%

d
")(,GU%,04OU&&-4*.Iu  4'.$(E5??T!(%)D%

d
")59M5**+d2926Je''(4/6rH   rZ   c                       e Zd ZU dZeed<   y)LightGlueImageProcessorKwargsz
    do_grayscale (`bool`, *optional*, defaults to `self.do_grayscale`):
        Whether to convert the image to grayscale. Can be overridden by `do_grayscale` in the `preprocess` method.
    do_grayscaleN)rL   rM   rN   rO   rU   rQ   r<   rH   rG   rh   rh      s    
 rH   rh   F)totalc                   d     e Zd Z	 ddddeee   z  dedeeee	j                  f      f fdZ xZS )LightGlueImageProcessoroutputsrZ   target_sizes	thresholdreturnc                 &    t         |   |||      S NrB   post_process_keypoint_matchingrD   rm   rn   ro   rF   s       rG   rt   z6LightGlueImageProcessor.post_process_keypoint_matching   s     w5g|YWWrH   r4   )rL   rM   rN   r   listrf   rS   r>   rT   rc   Tensorrt   rV   rW   s   @rG   rl   rl      sZ    
 	X2X !4;.X 	X
 
d3$%	&X XrH   rl   rc   backendsc                   h     e Zd Z ed      	 d
dddeee   z  dedeee	df      f fd	       Z
 xZS )LightGlueImageProcessorPilry   rz   rm   rZ   rn   ro   rp   ztorch.Tensorc                 &    t         |   |||      S rr   rs   ru   s       rG   rt   z9LightGlueImageProcessorPil.post_process_keypoint_matching   s     w5g|YWWrH   rv   )rL   rM   rN   r   r   rw   rf   rS   r>   rT   rt   rV   rW   s   @rG   r}   r}      sc    z"
 	X2X !4;.X 	X
 
d3&'	(X #XrH   r}   c            
            e Zd Zdef fdZ	 ddej                  dedz  deej                     eej                  ej                  f   z  fdZ	 xZ
S )	LightGluePositionalEncoderconfigc                     t         |           t        j                  d|j                  |j
                  z  dz  d      | _        y )Nr   Fbias)rB   __init__r   Linearr'   r+   	projectorrD   r   rF   s     rG   r   z#LightGluePositionalEncoder.__init__   s:    1f&;&;v?Y?Y&Y]^&^ejkrH   r^   output_hidden_statesNrp   c                     | j                  |      }|j                  dd      }t        j                  |      }t        j                  |      }||f}|r||f}|S |f}|S )Nr   dim)r   repeat_interleaverc   cossin)rD   r^   r   projected_keypoints
embeddingscosinessinesoutputs           rG   forwardz"LightGluePositionalEncoder.forward   sq     #nnY7(::1":E
))J'		*%u%
6J*12 R\P]rH   F)rL   rM   rN   r#   r   rc   rx   rU   rf   r   rV   rW   s   @rG   r   r      s]    l l
 LQ		=AD[		u||	uU\\5<<%?@	@	rH   r   c                   .    e Zd Zdedef fdZ	 	 	 	 ddej                  deej                  ej                  f   dz  dej                  dz  dej                  dz  d	ej                  dz  d
e	e
   deej                  ej                  dz  f   fdZ xZS )LightGlueAttentionr   	layer_idxc                 &    t         |           | `y rr   )rB   r   
rotary_embrD   r   r   rF   s      rG   r   zLightGlueAttention.__init__   s    OrH   Nra   position_embeddingsattention_maskencoder_hidden_statesencoder_attention_maskrE   rp   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	|d u}
|
r|n|}|
r|n|}| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }||\  }}t        |	|||      \  }	}t        j                  | j                  j                  t              } || |	|||f| j                  sdn| j                  | j                  d|\  }} |j                   g |d j#                         }| j%                  |      }||fS )Nr      r   r4   )dropoutscaling)shapehead_dimq_projview	transposek_projv_projr   r   get_interfacer   _attn_implementationr   trainingr5   r   reshape
contiguouso_proj)rD   ra   r   r   r   r   rE   input_shapehidden_shapequery_statesis_cross_attentioncurrent_statescurrent_attention_mask
key_statesvalue_statesr   r   attention_interfaceattn_outputattn_weightss                       rG   r   zLightGlueAttention.forward   s    $))#2.88b8$--8{{=166|DNNqRST2$>2D.-;M!7Sa[[055lCMMaQRS
{{>277EOOPQSTU**HC';L*VY[^'_$L*(?(M(MKK,,.E)
 %8"	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((rH   )NNNN)rL   rM   rN   r#   rR   r   rc   rx   rf   r   r   r   rV   rW   s   @rG   r   r      s     3  IM.2596:*)||*) #5<<#=>E*) t+	*)
  %||d2*) !&t 3*) -.*) 
u||U\\D00	1*)rH   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueMLPr   c                     t         |   |       t        j                  |j                  |j                        | _        t        j                  |j                  d      | _        y )NT)elementwise_affine)rB   r   r   r   r@   fc1	LayerNorm
layer_normr   s     rG   r   zLightGlueMLP.__init__   sG     99V55v7O7OP,,v'?'?TXYrH   ra   rp   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S rr   )r   r   activation_fnfc2)rD   ra   s     rG   r   zLightGlueMLP.forward  sB    /6**=9/rH   	rL   rM   rN   r#   r   rc   rx   r   rV   rW   s   @rG   r   r      s,    Z Z
U\\ ell rH   r   c                        e Zd Zdedef fdZ	 	 ddej                  dej                  dej                  dedz  d	edz  d
e	ej                  e	ej                     dz  e	ej                     dz  f   fdZ
 xZS )LightGlueTransformerLayerr   r   c                     t         |           t        ||      | _        t	        |      | _        t        ||      | _        t	        |      | _        y rr   )rB   r   r   self_attentionr   self_mlpcross_attention	cross_mlpr   s      rG   r   z"LightGlueTransformerLayer.__init__  sD    0C$V,1&)D%f-rH   descriptorsr^   r   r   Noutput_attentionsrp   c                    |rdnd }|rdnd }|r||fz   }|j                   \  }}	}
| j                  ||||      \  }}t        j                  ||gd      }| j	                  |      }||z   }|r||f}|j                  dd|	|
      j                  d      j                  ||	|
      }|6|j                  dddd|	      j                  d      j                  |dd|	      nd }| j                  ||||      \  }}t        j                  ||gd      }| j                  |      }||z   }|r6||f}||j                  ||	|
      fz   z   |j                  ||	|
      fz   |z   }|r
||fz   |fz   }|||fS )Nr<   )r   r   r   r   r   r   r   )r   r   r   )	r   r   rc   catr   r   flipr   r   )rD   r   r^   r   r   r   all_hidden_statesall_attentions
batch_sizenum_keypointsr'   attention_outputself_attentionsintermediate_statesoutput_statesself_attention_descriptorsself_attention_hidden_statesr   r   cross_attention_outputcross_attentionscross_intermediate_statescross_output_statescross_attention_hidden_statess                           rG   r   z!LightGlueTransformerLayer.forward  s	    #7BD0d 1[N B4?4E4E1
M> -1,?,? ))/	 -@ -
)/ $ii6F(GRP&9:%0=%@",?+O( '..r1m^TT!WWZ? 	 ) ""2q!Q>CCAFNNz[\^_ano 	 483G3G&"7#9/	 4H 4
0 0 %*II/IKa.bhj$k!"nn-FG03FF-FH[,\)!-55j-Q_`bc./ &&z=.QST 0	0  +.@@DTCVVN-~==rH   )FF)rL   rM   rN   r#   rR   r   rc   rx   rU   rf   r   rV   rW   s   @rG   r   r     s    . .3 . -2).H>\\H> <<H> 	H>
 #TkH>  $;H> 
u||U5<<047u||9Lt9SS	TH>rH   r   
similaritymatchability0matchability1rp   c                    | j                   \  }}}t        j                  j                  |      t        j                  j                  |      j	                  dd      z   }t        j                  j                  | d      }t        j                  j                  | j	                  dd      j                         d      j	                  dd      }| j                  ||dz   |dz   fd      }	||z   |z   |	ddd|d|f<   t        j                  j                  |j                  d             |	dddddf<   t        j                  j                  |j                  d             |	dddddf<   |	S )z;create the log assignment matrix from logits and similarityr   r   r   r   N)	r   r   
functional
logsigmoidr   log_softmaxr   new_fullsqueeze)
r   r   r   r   num_keypoints_0num_keypoints_1certaintiesscores0scores1scoress
             rG   sigmoid_log_double_softmaxr   `  sS    4>3C3C0J--**=9BMM<T<TUb<c<m<mnoqr<ssKmm''
A6Gmm''
(<(<R(D(O(O(QSTU__`bdfgG  *o.A?UVCV!WYZ[F4;g4E4SF1 0 00111=3H3H3L2LMF1crc2:11=3H3H3L2LMF1b#2#:MrH   c                        e Zd Zdef fdZdej                  dej                  dej                  fdZdej                  dej                  fdZ xZ	S )LightGlueMatchAssignmentLayerr   c                     t         |           |j                  | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  dd      | _        y )NTr   r   )rB   r   r'   r   r   final_projectionmatchabilityr   s     rG   r   z&LightGlueMatchAssignmentLayer.__init__p  sY    $33 "		$*=*=t?R?RY] ^IId&9&914HrH   r   r`   rp   c                    |j                   \  }}}| j                  |      }|t        j                  | j                  |j
                        dz  z  }|j                  |dz  d||      }|d d df   }|d d df   }||j                  dd      z  }	||j                  |dz  d|      }|d d df   j                  d      }
|d d df   j                  d      j                  dd      }|
|z  }|	j                  |dk(  t        j                  |	j                        j                        }	| j                  |      }|j                  |dz  d|d      }|d d df   }|d d df   }t        |	||      }|S )Ndeviceg      ?r   r   r   r   r   )r   r   rc   tensorr'   r   r   r   	unsqueezemasked_fillfinfodtypeminr   r   )rD   r   r`   r   r   r'   m_descriptorsm_descriptors0m_descriptors1r   mask0mask1r   matchability_0matchability_1r   s                   rG   r   z%LightGlueMatchAssignmentLayer.forwardw  s   4?4E4E1
M>--k:%T5H5HQ^QeQe(fjn(nn%--jAoq-Q_`&q!t,&q!t,#n&>&>r2&FF
<<
aMBDAJ((,EAJ((,66r2>E5=D#//	5;;zGWGW;X;\;\]J ((5#++J!OQqQ%ad+%ad+ ,JWrH   c                     | j                  |      }t        j                  j                  |      j	                  d      }|S )z0Get matchability of descriptors as a probabilityr   )r   r   r   sigmoidr   )rD   r   r   s      rG   get_matchabilityz.LightGlueMatchAssignmentLayer.get_matchability  s7    ((5}},,\:BB2FrH   )
rL   rM   rN   r#   r   rc   rx   r   r  rV   rW   s   @rG   r   r   o  sR    I I5<< u||  4ELL U\\ rH   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueTokenConfidenceLayerr   c                 l    t         |           t        j                  |j                  d      | _        y )Nr   )rB   r   r   r   r'   tokenr   s     rG   r   z&LightGlueTokenConfidenceLayer.__init__  s&    YYv44a8
rH   r   rp   c                     | j                  |j                               }t        j                  j	                  |      j                  d      }|S )Nr   )r  detachr   r   r  r   )rD   r   r  s      rG   r   z%LightGlueTokenConfidenceLayer.forward  s=    

;--/0%%e,44R8rH   r   rW   s   @rG   r  r    s*    9 9
5<< ELL rH   r  c                   4    e Zd ZU dZeed<   dZdZdZdZ	dZ
dZy)	LightGluePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r   r$   pixel_values)imageFTN)rL   rM   rN   rO   r#   rQ   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_supports_flash_attn_supports_sdpar<   rH   rG   r  r    s1    
 #$O!&+#NrH   r  r   ro   c                 6   | j                   \  }}}| ddddddf   j                  d      }| ddddddf   j                  d      }|j                  }|j                  }t        j                  |j                   d   |j
                        d   }t        j                  |j                   d   |j
                        d   }	||j                  d|      k(  }
|	|j                  d|      k(  }|j                  j                         }|j                  d      }t        j                  |
||      }t        j                  ||j                  d|      |      }|
||kD  z  }||j                  d|      z  }t        j                  ||d      }t        j                  ||d      }t        j                  ||g      j                  dd      j                  |dz  d      }t        j                  ||g      j                  dd      j                  |dz  d      }||fS )z1obtain matches from a score matrix [Bx M+1 x N+1]Nr   r   r   r   r   )r   maxindicesrc   aranger   gathervaluesexp
new_tensorwherestackr   r   )r   ro   r   _max0max1matches0matches1indices0indices1mutual0mutual1zeromatching_scores0matching_scores1valid0valid1r\   r]   s                      rG   get_matches_from_scoresr5    s   ||J1!SbS#2#+""1%D!SbS#2#+""1%D||H||H ||HNN1-hooFtLH||HNN1-hooFtLH(//!X66G(//!X66G ;;??D??1D{{7D$7{{7,<,C,CAx,PRVW(945Fv}}Q11F {{68R0H{{68R0Hkk8X./99!Q?GG
UVXZ[Gkk#35E"FGQQRSUVW__`jmn`nprsOO##rH   r^   heightwidthc                     t        j                  ||g| j                  | j                        d   }|dz  }|j	                  d      j
                  dz  }| |ddddf   z
  |d   z  } | S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    r   r  Nr   r   .).NN)rc   r   r   r  r  r"  )r^   r6  r7  sizeshiftscales         rG   normalize_keypointsr=    sp     <<	0@0@	XY]^D1HEHHRL!#EU3a<00E/4JJIrH   zV
    LightGlue model taking images as inputs and outputting the matching of them.
    c                       e Zd ZdZdef fdZdedefdZ	 d#de	j                  de	j                  d	ed
z  dee	j                  ee	j                  e	j                  f   f   fdZde	j                  dede	j                  de	j                  de	j                  f
dZd$dZde	j                  de	j                  dede	j                  fdZde	j                  de	j                  de	j                  de	j                  de	j                  de	j                  defdZd Zde	j                  de	j                  de	j                  de	j                  dee	j                  e	j                  f   f
dZ	 	 	 d%de	j                  de	j                  dedede	j                  d
z  ded
z  d	ed
z  dee	j                  e	j                  e	j                  eef   fdZee	 	 	 d%d e	j.                  d!e	j0                  d
z  ded
z  d	ed
z  deez  f
d"              Z xZS )&LightGlueForKeypointMatchingar  
    LightGlue is a model matching keypoints in images by leveraging detections from a keypoint detector such as
    SuperPoint. It is based on the SuperGlue architecture and is designed to be lightweight and efficient.
    It consists of :
        1. Keypoint Encoder
        2. A Graph Neural Network with self and cross attention layers
        3. Matching Assignment layers

    The correspondence ids use -1 to indicate non-matching points.

    Philipp Lindenberger, Paul-Edouard Sarlin and Marc Pollefeys. LightGlue: Local Feature Matching at Light Speed.
    In ICCV 2023. https://huggingface.co/papers/2306.13643
    r   c           	         t         |   |       t        j                  |j                        | _        |j                  j                  | _        |j                  | _        |j                  | _
        |j                  | _        |j                  | _        |j                  | _        | j                  | j                  k7  r2t        j                  | j                  | j                  d      | _        nt        j"                         | _        t%        |      | _        t        j(                  t+        |j                        D cg c]  }t-        ||       c}      | _        t        j(                  t+        |j                        D cg c]  }t1        |       c}      | _        t        j(                  t+        |j                  dz
        D cg c]  }t5        |       c}      | _        | j9                          y c c}w c c}w c c}w )NTr   )r   r   )rB   r   r   from_configr%   keypoint_detectordescriptor_decoder_dim keypoint_detector_descriptor_dimr'   r)   
num_layersr0   r-   r.   r   r   input_projectionIdentityr   positional_encoder
ModuleListranger   transformer_layersr   match_assignment_layersr  token_confidence	post_init)rD   r   ir'  rF   s       rG   r   z%LightGlueForKeypointMatching.__init__  s    !>!J!J6KjKj!k060O0O0f0f-$33 22 & 7 7 & 7 7 & 7 7$"G"GG$&IId.S.SUYUhUhos$tD!$&KKMD!"<V"D"$--EJ6KcKcEde&v;e#
 (*}}<A&BZBZ<[\q*62\(
$ !#<A&BZBZ]^B^<_`q*62`!
 	 f ] as   ?G;H Hlayer_indexrp   c                     ddt        j                  d|z  | j                  z        z  z   }t        j                  |dd      S )z-scaled confidence threshold for a given layerg?r/   g      r   r   )npr#  rE  clip)rD   rP  ro   s      rG   _get_confidence_thresholdz6LightGlueForKeypointMatching._get_confidence_threshold  s;    #tk'9DOO'K LLL	wwy!Q''rH   r   r^   r   Nc                     |j                         j                         }| j                  |      }| j                  ||      }||fS )Nr   )r  r   rF  rH  )rD   r   r^   r   projected_descriptorskeypoint_encoding_outputs         rG   _keypoint_processingz1LightGlueForKeypointMatching._keypoint_processing!  sO     "((*557 $ 5 5k B#'#:#:9[o#:#p $&>>>rH   keypoint_confidencesr`   
num_pointsc                 |   |j                   \  }}|| j                  dz
  k  ru|j                  |dk(  d      }|j                  |dz  d      }| j	                  |      }d||k  j                         j                  d      |z  z
  }|| j                  kD  }	|	S t        j                  |t        j                        }	|	S )zRevaluate whether we should stop inference based on the confidence of the keypointsr   r   r   r   g      ?r   r  )r   rE  r   r   rT  rS   sumr-   rc   onesrU   )
rD   rZ  rP  r`   r[  r   r'  ro   ratio_confidentearly_stopped_pairss
             rG   _get_early_stopped_image_pairsz;LightGlueForKeypointMatching._get_early_stopped_image_pairs)  s     


A1,, $8#C#CDAIq#Q #7#?#?
aQS#T 66{CI!%9I%E$L$L$N$R$RWX$R$Y\f$ffO"1D4I4I"I
 #" #(**Zuzz"J""rH   c                     |
||   }||   } | j                   |   ||      }t        || j                        \  }}||fS rr   )rL  r5  r0   )rD   r   r`   rP  early_stopsr   r\   r]   s           rG   _get_keypoint_matchingz3LightGlueForKeypointMatching._get_keypoint_matching<  sW    "%k2K$D:--k:;M#:64CXCX#Y ''rH   confidencesr   c                 \    |d| j                   z
  kD  }|||| j                  |      k  z  }|S )z#mask points which should be removedr   )r.   rT  )rD   rf  r   rP  keeps        rG   _get_pruning_maskz.LightGlueForKeypointMatching._get_pruning_maskD  s<    T2223"K4#A#A+#NNNDrH   r  prune_outputc                    |j                   \  }}	}	| j                  |   j                  |      }
| j                  ||
|      j	                  |dk(  t        j                  d            fd||d   |d   |fD        \  }}}}}t        |      D ]  }||||   fxx   dz  cc<    d ||||fD        \  }}}}||f}t        |dd      }|||||fS )	z
        For a given layer, prune keypoints based on the confidence of the keypoints and the matchability of the
        descriptors.
        r   Fc              3   n   K   | ]&  }t        |      D cg c]
  \  }}||    c}} ( y c c}}w wrr   )zip).0r   tr`   pruned_keypoints_masks       rG   	<genexpr>zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>_  s9      c
 %(0E$FGDQtWGc
Gs   5/5r   c              3   6   K   | ]  }t        |d         yw)T)batch_firstNr   )rn  pruned_tensors     rG   rq  zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>g  s$      S
 D99S
s   Tr   rs  padding_value)	r   rL  r  ri  r   rc   r   rJ  r   )rD   r   r^   r`   r  rj  rZ  rP  r   r'  descriptors_matchabilitypruned_descriptorspruned_keypoints_0pruned_keypoints_1pruned_maskpruned_indicesrO  pruned_keypointsrp  s                     @rG   _do_layer_keypoint_pruningz7LightGlueForKeypointMatching._do_layer_keypoint_pruningK  s+    ',,
Aq#'#?#?#L#]#]^i#j  $ 6 67KMegr s 5 A A$!)U\\Z_M` ac
&	!ilDY[bcc
_.0BKQ_ z" 	4AN1--.!3.	4S
"46HJ\^i!jS
O.0BK /0BC%n$VXY!#3^[R^^^rH   c                     t        j                        t        j                  j                  d         }|   }|   d ||fD        \  }}d ||fD        \  }}fd||||fD        \  }}}}||||fS )Nr   c              3   8   K   | ]  }t        |d d        yw)Tr   ru  Nr   rn  r   s     rG   rq  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>}  s$      3
 TDD3
   c              3   8   K   | ]  }t        |d d        yw)Tr   ru  Nr   r  s     rG   rq  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s$      >
 TCC>
r  c              3   (   K   | ]	  }|     y wrr   r<   )rn  r   early_stops_indicess     rG   rq  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s!      g
 &'g
s   )rc   r&  r   r   )rD   r  final_pruned_keypoints_indices!final_pruned_keypoints_iterationsr\   r]   idsorder_indicess    `      rG   _concat_early_stopped_outputsz:LightGlueForKeypointMatching._concat_early_stopped_outputsp  s     $kk*=>ll.44Q78+C01-@3
"$BC3
//>
*,MN>
::g
 .1	g
c"@Bc ./PRY[jjjrH   r\   r]   r   c                    |j                   \  }fd|||fD        \  }}}|d d df   }|d d df   }|d d df   }|d d df   }	|d d df   }
|d d df   }t        j                  dz  d|fd|j                  |j                        }t        j
                  dz  d|f|j                  |j                        }t        dz        D ]  }t        j                  ||   dk(  d||   j                  d||   j                  d                  ||d||   f<   t        j                  |	|   dk(  d||   j                  d|	|   j                  d                  ||d||   f<   |
|   ||d||   f<   ||   ||d||   f<    ||fS )Nc              3   J   K   | ]  }|j                  d z  d d        yw)r   r   N)r   )rn  r   r   s     rG   rq  zJLightGlueForKeypointMatching._do_final_keypoint_pruning.<locals>.<genexpr>  s'      -
7=FNN:?Ar2-
s    #r   r   r   r   r9  )r  )
r   rc   fullr   r  zerosrJ  r%  r!  clamp)rD   r  r\   r]   r   r'  r,  r-  r*  r+  r1  r2  _matches_matching_scoresrO  r   s                  @rG   _do_final_keypoint_pruningz7LightGlueForKeypointMatching._do_final_keypoint_pruning  s     
A-
BI7TcAd-
)/ 1a4=1a4=1a4=1a4=*1a40*1a40 ::zQ=A2gnndkdqdqr ;;1_a/oNcNc
 zQ' 	FA*/++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' +0++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' 3C12EQ8A;./2B12EQ8A;./	F )))rH   r6  r7  r   c           
      &	  ( |rdnd }|rdnd }	|j                   d   dk(  rT|j                   d d }
|j                  |
dt        j                        |j	                  |
      |j	                  |
      ||	fS |j
                  }|j                   \  }}}}t        j                  |j                  |d      d      }|j                  |dz  |d      }||j                  |dz  |      nd }|j                  |dz  || j                        }t        j                  |dz  |      }t        |||      }| j                  |||	      \  }}|d   }| j                  dkD  }| j                  dkD  }g }g }g }g }g }t        j                  d||      j                  |dz  d      }t        j                  |      }t!        | j"                        D ]3  }|j%                         }|| j'                  ||      }n&t        j(                  ||d
   f|j
                        } | j*                  |   |||||      }|\  }}} |r||z   }|r|	| z   }	|r|| j"                  dz
  k  r+ | j,                  |   |      }!| j/                  |!|||      }"n%t        j(                  |t        j0                        }"t        j2                  |"      r|"j5                  d      (|(   }#| j7                  |||(      \  }$}%|j9                  t;        |#             |j9                  t;        |$             |j9                  t;        |%             |r:|j9                  t;        |(                |j9                  t;        |(                ||"    }t=        (fd||d   |d   ||fD              \  }}&}'}}|&|'f}|rt=        (fd||!fD              \  }}}!t        j>                  |"      r n$|s| jA                  |||||!|      \  }}}}}6 |r4|r2| jC                  |||||      \  }}}}| jE                  ||||      \  }}nE| j7                  ||| j"                  dz
        \  }}t        j                  |      | j"                  z  }|j                  |d|      }|||||	fS )Nr<   r   r   r   r]  r   r   r   rV  r   )r   r   r   )r[  )rd  c              3   *   K   | ]
  }|      y wrr   r<   rn  r   rd  s     rG   rq  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      V" |,V   c              3   *   K   | ]
  }|      y wrr   r<   r  s     rG   rq  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>%  s"      l & #K<0lr  )#r   r   rc   rR   	new_zerosr   r^  r   rD  r   r=  rY  r-   r.   expand	ones_likerJ  rE  r:  get_extended_attention_maskr_  rK  rM  rb  rU   anyr   re  extendrw   rf   allr~  r  r  ))rD   r^   r   r6  r7  r`   r   r   r   r   r   r   r   r'  initial_num_keypointsnum_points_per_pairimage_indicesrX  do_early_stopdo_keypoint_pruningr  r\   r]   r  r  pruned_keypoints_indicespruned_keypoints_iterationsrP  r   extended_attention_masklayer_outputra   	attentionrZ  ra  early_stopped_image_indicesearly_stopped_matchesearly_stopped_matching_scoreskeypoints_0
keypoint_1rd  s)                                           @rG   _match_image_pairz.LightGlueForKeypointMatching._match_image_pair  sg    #7BD0d??1"OOCR(E""5"EII">##E*##E*!  !!2;///
A,a#iiZ(D!L%%j1n6KQO	FJFVt||JN,AB\`!))*q.:OQUQvQvwZ!^FC'	65A	040I0I9M 1J 1
-- -Q/	 --1 #33a7 )+&,.)#(<<3HQW#X#_#_`jmn`npr#s &+oo6N&O# 1 R	K%**,K*.*J*J4Q\*]'*/**j+b/5R[d[k[k*l'?422;?6%9"3L 5A1K	#$5$E! !/)!;1!44+M4+@+@+Mk+Z( +/*M*M,k4L_ +N +'
 +0**Zuzz*R'9901 #6"G"G"JK2?2L/KOKfKf#T;K Lg LH)+H (..t4O/PQNN4(=#>?#**40M+NO*6==dC[\gCh>ij9@@FabmFnAop +>?R>R*S'PU V'2IaL)A,PTVc&dV QMKj$ "-j 9I*fk l !9 ; 4+l gc02MOc 9901" 33#!03,# dY(@$HcQR	h 0 22'25# h*,MwXg (,'F'F.%	($G_ (,'B'B;PTVZVeVehiVi'j$G_050PSWSbSb0b-,M,U,U0-
)
 -
 	
rH   r  labelsc           
      0   d }|t        d      ||n| j                  j                  }||n| j                  j                  }|j                  dk7  s|j                  d      dk7  rt        d      |j                  \  }}}	}
}|j                  |dz  |	|
|      }| j                  |      }|d d \  }}}}|j                  |ddd      j                  |      }|j                  |dd| j                        j                  |      }|j                  |dd      }|j                         }|d d d d d d df   |z  |d d d d d d df<   |d d d d d d df   |
z  |d d d d d d df<   | j                  |||
||||	      \  }}}}}t        ||||||||
      S )Nz9LightGlue is not trainable, no labels should be provided.   r   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)r*   r   r   )r`   r   r   )r[   r\   r]   r^   r_   r`   ra   rb   )rJ   r   r   r   ndimr:  r   r   rB  torD  cloner  rZ   )rD   r  r  r   r   rE   r[   r   r'  channelsr6  r7  keypoint_detectionsr^   r   r`   absolute_keypointsr\   r]   r_   ra   rb   s                         rG   r   z$LightGlueForKeypointMatching.forwardb  s    XYY1B1N-TXT_T_TqTq$8$D $++JjJj 	 !\%6%6q%9Q%>noo1=1C1C.
Ax#++JNHfeT"44\B*=bq*A'	1k4%%j!R;>>|L	!))*aT=b=bcffgst||J2.&__.);Aq!QJ)G%)O1aA:&);Aq!QJ)G&)P1aA:&EIE[E[/!5 F\ F
B%
 /+'!	
 		
rH   r   rr   )NNN)rL   rM   rN   rO   r#   r   rR   rS   rT  rc   rx   rU   rf   rY  rb  re  ri  r~  r  r  r  r   r   rd   
LongTensorrZ   r   rV   rW   s   @rG   r?  r?    s    <(S (U ( gl? <<?49LL?X\_cXc?	u||U5<<#=>>	??#$)LL#?B#JO,,#didpdp#	#&(U\\ 5<< ^a fkfrfr #_\\#_ <<#_ ll	#_
 #_ ll#_ $ll#_ #_Jk@#*#* #* 	#*
 ||#* 
u||U\\)	*#*V %))-,0k
<<k
 \\k
 	k

 k
 llT!k
  $;k
 #Tkk
 
u||U\\5<<E	Fk
Z  +/)-,04
''4
   4'4
  $;	4

 #Tk4
 
0	04
  4
rH   r?  )r  r?  r#   rl   r}   )Icollections.abcr   dataclassesr   numpyrR  rc   huggingface_hub.dataclassesr   r   torch.nn.utils.rnnr   configuration_utilsr
   modeling_flash_attention_utilsr   modeling_utilsr   r   processing_utilsr   r   utilsr   r   r   r   r   utils.import_utilsr   autor   r   auto.modeling_autor   clip.modeling_clipr   cohere.modeling_coherer   llama.modeling_llamar   r   (superglue.image_processing_pil_supergluer   $superglue.image_processing_supergluer   r9   r    
get_loggerrL   loggerr#   rZ   rh   rl   r}   Moduler   r   r   r   rx   r   r   r  r  rS   rf   r5  rR   r=  r?  __all__r<   rH   rG   <module>r     s8   % !   .  + 3 B F 4 W W * - > ( 9 J Q J ) 
		H	% 9:AR& AR  ;ARH  7k  7  7FL X5 X 
:X!; X X "/) /)d7 P>		 P>f-2\\JO,,
\\&BII &R	BII 	   $ELL $U $uU\\[`[g[gMgGh $@5<<  S U\\ , 
i
#; i

i
XrH   