
    hK                     6   d dl mZ d dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z  e ed       G d de                    Z! G d de	jD                        Z#d Z$dAdZ%dejL                  de'dejL                  fdZ(	 dBde	jD                  dejL                  d ejL                  d!ejL                  d"eejL                     d#e)d$e)d%ee   fd&Z* G d' d(e	jD                        Z+ G d) d*e	jD                        Z, G d+ d,e	jD                        Z-d-ejL                  d.ejL                  d/ejL                  dejL                  fd0Z. G d1 d2e	jD                        Z/ G d3 d4e	jD                        Z0e G d5 d6e             Z1d7ejL                  d8e)de2ejL                  ejL                  f   fd9Z3d:ejL                  d;e'd<e'dejL                  fd=Z4 ed>       G d? d@e1             Z5d6d@gZ6y)C    )	dataclass)CallableOptionalUnionN)nnpad_sequence   )ACT2FN)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstring)deprecate_kwarg)can_return_tuple   )AutoModelForKeypointDetection   )LightGlueConfiga  
    Base class for outputs of LightGlue keypoint matching models. Due to the nature of keypoint detection and matching,
    the number of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the
    batch of images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask
    tensor is used to indicate which values in the keypoints, matches, matching_scores and prune tensors are keypoint
    matching information.
    )custom_introc                   ^   e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeeej                        ed	<   dZeeej                        ed
<   y)LightGlueKeypointMatchingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
        Loss computed during training.
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    prune (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
        Pruning mask indicating which keypoints are removed and at which layer.
    mask (`torch.BoolTensor` of shape `(batch_size, num_keypoints)`):
        Mask indicating which values in matches, matching_scores, keypoints and prune are keypoint matching
        information.
    hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)` returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`
    attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)` returned when `output_attentions=True` is passed or when
        `config.output_attentions=True`
    Nlossmatchesmatching_scores	keypointsprunemaskhidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r    	IntTensorr!   r"   tupler#        n/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/lightglue/modeling_lightglue.pyr   r   '   s    0 )-D(5$$
%,+/GXe''(/37OXe//07-1Ix))*1'+E8EOO$+(,D(5$$
%,8<M8E%"3"345<59Ju00129r.   r   c                        e Zd Zdef fdZ	 ddej                  dee   de	e
ej                     e
ej                  ej                  f   f   fdZ xZS )LightGluePositionalEncoderconfigc                     t         |           t        j                  d|j                  |j
                  z  dz  d      | _        y )Nr   Fbias)super__init__r   Lineardescriptor_dimnum_attention_heads	projectorselfr2   	__class__s     r/   r7   z#LightGluePositionalEncoder.__init__U   s:    1f&;&;v?Y?Y&Y]^&^ejkr.   r   output_hidden_statesreturnc                     | j                  |      }|j                  dd      }t        j                  |      }t        j                  |      }||f}|r||f}|S |f}|S )Nr   dim)r;   repeat_interleaver(   cossin)r=   r   r?   projected_keypoints
embeddingscosinessinesoutputs           r/   forwardz"LightGluePositionalEncoder.forwardY   sq     #nnY7(::1":E
))J'		*%u%
6J*12 R\P]r.   F)r$   r%   r&   r   r7   r(   Tensorr   boolr   r,   rM   __classcell__r>   s   @r/   r1   r1   T   sb    l l
 OT		=Ed^		uU\\"E%,,*D$EE	F	r.   r1   c                     | dd d df   }| ddd df   }t        j                  | |gd      j                  d      }|S )N.r   r   rB   rC   )r(   stackflatten)xx1x2rot_xs       r/   rotate_halfr[   e   sL    	
3!8B	
319BKK"b	r*2226ELr.   c                 6   | j                   }| j                         } |j                         }|j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }|j	                  |      |j	                  |      fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    dtype)r^   float	unsqueezer[   to)	qkrF   rG   position_idsunsqueeze_dimr^   q_embedk_embeds	            r/   apply_rotary_pos_embrh   m   s    ( GGE		A		A
--
&C
--
&C3w;q>C/0G3w;q>C/0G::E:"GJJUJ$;;;r.   r"   n_repr@   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)shapeexpandreshape)r"   ri   batchnum_key_value_headsslenhead_dims         r/   	repeat_kvrr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr.   modulequerykeyvalueattention_maskscalingdropoutkwargsc                 T   t        || j                        }t        || j                        }	t        j                  ||j	                  dd            |z  }
|#|d d d d d d d |j
                  d   f   }|
|z   }
t        j                  j                  |
dt        j                        j                  |j                        }
t        j                  j                  |
|| j                        }
t        j                  |
|	      }|j	                  dd      j                         }||
fS )Nr   r
   rT   rB   )rD   r^   )ptrainingr   )rr   num_key_value_groupsr(   matmul	transposerk   r   
functionalsoftmaxfloat32ra   r^   ry   r}   
contiguous)rs   rt   ru   rv   rw   rx   ry   rz   
key_statesvalue_statesattn_weightscausal_maskattn_outputs                r/   eager_attention_forwardr      s    3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL!$Q1.D
0@0@0D.D%DE#k1==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r.   c                   N    e Zd ZdZdedef fdZ eddd      	 	 	 	 dd	ej                  d
e
eej                  ej                  f      de
ej                     de
ej                     de
ej                     dee   deej                  e
ej                     f   fd       Z xZS )LightGlueAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr2   	layer_idxc                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nrq   g      Tr4   )r6   r7   r2   r   getattrhidden_sizer:   rq   ro   r~   rx   attention_dropout	is_causalr   r8   attention_biasq_projk_projv_projo_projr=   r2   r   r>   s      r/   r7   zLightGlueAttention.__init__   sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r.   past_key_valuepast_key_valuesz4.58)new_nameversionr"   position_embeddingsrw   encoder_hidden_statesencoder_attention_maskrz   r@   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	|d u}
|
r|n|}|
r|n|}| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }||\  }}t        |	|||      \  }	}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|||f| j                  sdn| j                  | j                  d|\  }} |j                  g |d j!                         }| j#                  |      }||fS )NrB   r   r   eager        )ry   rx   )rk   rq   r   viewr   r   r   rh   r   r2   _attn_implementationr   r}   r   rx   rm   r   r   )r=   r"   r   rw   r   r   rz   input_shapehidden_shapequery_statesis_cross_attentioncurrent_statescurrent_attention_maskr   r   rF   rG   attention_interfacer   r   s                       r/   rM   zLightGlueAttention.forward   s    $))#2.88b8$--8{{=166|DNNqRST2$>2D.-;M!7Sa[[055lCMMaQRS
{{>277EOOPQSTU**HC';L*VY[^'_$L*(?;;++w6"9$++:Z:Z"[$7"	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r.   )NNNN)r$   r%   r&   r'   r   intr7   r   r(   rO   r   r,   r   r   rM   rQ   rR   s   @r/   r   r      s    G
 
3 
. %0A6R LP158<9=*)||*) &eELL%,,,F&GH*) !.	*)
  (5*) !) 6*) -.*) 
u||Xell33	4*) S*)r.   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueMLPr2   c                 h   t         |           || _        t        |j                     | _        t        j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _        t        j                  |j                  d      | _        y )NT)elementwise_affine)r6   r7   r2   r   
hidden_actactivation_fnr   r8   intermediate_sizefc1r   fc2	LayerNorm
layer_normr<   s     r/   r7   zLightGlueMLP.__init__   s}    #F$5$5699V55v7O7OP99V55v7I7IJ,,v'?'?TXYr.   r"   r@   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S N)r   r   r   r   )r=   r"   s     r/   rM   zLightGlueMLP.forward  sB    /6**=9/r.   	r$   r%   r&   r   r7   r(   rO   rM   rQ   rR   s   @r/   r   r      s,    Z ZU\\ ell r.   r   c                        e Zd Zdedef fdZ	 	 ddej                  dej                  dej                  dee	   dee	   d	e
ej                  ee
ej                        ee
ej                        f   fd
Z xZS )LightGlueTransformerLayerr2   r   c                     t         |           t        ||      | _        t	        |      | _        t        ||      | _        t	        |      | _        y r   )r6   r7   r   self_attentionr   self_mlpcross_attention	cross_mlpr   s      r/   r7   z"LightGlueTransformerLayer.__init__  sD    0C$V,1&)D%f-r.   descriptorsr   rw   r?   output_attentionsr@   c                    |rdnd }|rdnd }|r||fz   }|j                   \  }}	}
| j                  ||||      \  }}t        j                  ||gd      }| j	                  |      }||z   }|r||f}|j                  dd|	|
      j                  d      j                  ||	|
      }|6|j                  dddd|	      j                  d      j                  |dd|	      nd }| j                  ||||      \  }}t        j                  ||gd      }| j                  |      }||z   }|r6||f}||j                  ||	|
      fz   z   |j                  ||	|
      fz   |z   }|r
||fz   |fz   }|||fS )Nr-   )r   rw   r   rB   rC   r   r   )r   r   r   )	rk   r   r(   catr   rm   flipr   r   )r=   r   r   rw   r?   r   all_hidden_statesall_attentions
batch_sizenum_keypointsr9   attention_outputself_attentionsintermediate_statesoutput_statesself_attention_descriptorsself_attention_hidden_statesr   r   cross_attention_outputcross_attentionscross_intermediate_statescross_output_statescross_attention_hidden_statess                           r/   rM   z!LightGlueTransformerLayer.forward  s	    #7BD0d 1[N B4?4E4E1
M> -1,?,? ))/	 -@ -
)/ $ii6F(GRP&9:%0=%@",?+O( '..r1m^TT!WWZ? 	 ) ""2q!Q>CCAFNNz[\^_ano 	 483G3G&"7#9/	 4H 4
0 0 %*II/IKa.bhj$k!"nn-FG03FF-FH[,\)!-55j-Q_`bc./ &&z=.QST 0	0  +.@@DTCVVN-~==r.   )FF)r$   r%   r&   r   r   r7   r(   rO   r   rP   r,   rM   rQ   rR   s   @r/   r   r   
  s    . .3 . 05,1H>\\H> <<H> 	H>
 'tnH> $D>H> 
u||XeELL&9:HU5<<EX<YY	ZH>r.   r   
similaritymatchability0matchability1c                    | j                   \  }}}t        j                  j                  |      t        j                  j                  |      j	                  dd      z   }t        j                  j                  | d      }t        j                  j                  | j	                  dd      j                         d      j	                  dd      }| j                  ||dz   |dz   fd      }	||z   |z   |	ddd|d|f<   t        j                  j                  |j                  d             |	dddddf<   t        j                  j                  |j                  d             |	dddddf<   |	S )z;create the log assignment matrix from logits and similarityr   r   rB   rT   r   N)	rk   r   r   
logsigmoidr   log_softmaxr   new_fullsqueeze)
r   r   r   r   num_keypoints_0num_keypoints_1certaintiesscores0scores1scoress
             r/   sigmoid_log_double_softmaxr   ]  sS    4>3C3C0J--**=9BMM<T<TUb<c<m<mnoqr<ssKmm''
A6Gmm''
(<(<R(D(O(O(QSTU__`bdfgG  *o.A?UVCV!WYZ[F4;g4E4SF1 0 00111=3H3H3L2LMF1crc2:11=3H3H3L2LMF1b#2#:Mr.   c                        e Zd Zdef fdZdej                  dej                  dej                  fdZdej                  dej                  fdZ xZ	S )LightGlueMatchAssignmentLayerr2   c                     t         |           |j                  | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  dd      | _        y )NTr4   r   )r6   r7   r9   r   r8   final_projectionmatchabilityr<   s     r/   r7   z&LightGlueMatchAssignmentLayer.__init__m  sY    $33 "		$*=*=t?R?RY] ^IId&9&914Hr.   r   r!   r@   c                    |j                   \  }}}| j                  |      }|t        j                  | j                  |j
                        dz  z  }|j                  |dz  d||      }|d d df   }|d d df   }||j                  dd      z  }	||j                  |dz  d|      }|d d df   j                  d      }
|d d df   j                  d      j                  dd      }|
|z  }|	j                  |dk(  t        j                  |	j                        j                        }	| j                  |      }|j                  |dz  d|d      }|d d df   }|d d df   }t        |	||      }|S )Ndeviceg      ?r   r   r   rB   rT   )rk   r   r(   tensorr9   r   rm   r   r`   masked_fillfinfor^   minr   r   )r=   r   r!   r   r   r9   m_descriptorsm_descriptors0m_descriptors1r   mask0mask1r   matchability_0matchability_1r   s                   r/   rM   z%LightGlueMatchAssignmentLayer.forwardt  s   4?4E4E1
M>--k:%T5H5HQ^QeQe(fjn(nn%--jAoq-Q_`&q!t,&q!t,#n&>&>r2&FF
<<
aMBDAJ((,EAJ((,66r2>E5=D#//	5;;zGWGW;X;\;\]J ((5#++J!OQqQ%ad+%ad+ ,JWr.   c                     | j                  |      }t        j                  j                  |      j	                  d      }|S )z0Get matchability of descriptors as a probabilityrB   )r   r   r   sigmoidr   )r=   r   r   s      r/   get_matchabilityz.LightGlueMatchAssignmentLayer.get_matchability  s7    ((5}},,\:BB2Fr.   )
r$   r%   r&   r   r7   r(   rO   rM   r   rQ   rR   s   @r/   r   r   l  sR    I I5<< u||  4ELL U\\ r.   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueTokenConfidenceLayerr2   c                 l    t         |           t        j                  |j                  d      | _        y Nr   )r6   r7   r   r8   r9   tokenr<   s     r/   r7   z&LightGlueTokenConfidenceLayer.__init__  s&    YYv44a8
r.   r   r@   c                     | j                  |j                               }t        j                  j	                  |      j                  d      }|S )NrB   )r   detachr   r   r   r   )r=   r   r   s      r/   rM   z%LightGlueTokenConfidenceLayer.forward  s=    

;--/0%%e,44R8r.   r   rR   s   @r/   r   r     s*    9 9
5<< ELL r.   r   c                   0    e Zd ZU dZeed<   dZdZdZdZ	dZ
y)LightGluePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r2   	lightgluepixel_valuesFTN)r$   r%   r&   r'   r   r*   base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpar-   r.   r/   r  r    s+    
 #$O&+#Nr.   r  r   	thresholdc                 6   | j                   \  }}}| ddddddf   j                  d      }| ddddddf   j                  d      }|j                  }|j                  }t        j                  |j                   d   |j
                        d   }t        j                  |j                   d   |j
                        d   }	||j                  d|      k(  }
|	|j                  d|      k(  }|j                  j                         }|j                  d      }t        j                  |
||      }t        j                  ||j                  d|      |      }|
||kD  z  }||j                  d|      z  }t        j                  ||d      }t        j                  ||d      }t        j                  ||g      j                  dd      j                  |dz  d      }t        j                  ||g      j                  dd      j                  |dz  d      }||fS )z1obtain matches from a score matrix [Bx M+1 x N+1]NrB   r   r   r   r   )rk   maxindicesr(   aranger   gathervaluesexp
new_tensorwhererU   r   rm   )r   r  r   _max0max1matches0matches1indices0indices1mutual0mutual1zeromatching_scores0matching_scores1valid0valid1r   r   s                      r/   get_matches_from_scoresr#    s   ||J1!SbS#2#+""1%D!SbS#2#+""1%D||H||H ||HNN1-hooFtLH||HNN1-hooFtLH(//!X66G(//!X66G ;;??D??1D{{7D$7{{7,<,C,CAx,PRVW(945Fv}}Q11F {{68R0H{{68R0Hkk8X./99!Q?GG
UVXZ[Gkk#35E"FGQQRSUVW__`jmn`nprsOO##r.   r   heightwidthc                     t        j                  ||g| j                  | j                        d   }|dz  }|j	                  d      j
                  dz  }| |ddddf   z
  |d   z  } | S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    r   r^   Nr   rB   .).NN)r(   r   r   r^   r  r  )r   r$  r%  sizeshiftscales         r/   normalize_keypointsr+    sp     <<	0@0@	XY]^D1HEHHRL!#EU3a<00E/4JJIr.   zV
    LightGlue model taking images as inputs and outputting the matching of them.
    c                       e Zd ZdZdef fdZdedefdZ	 d"de	j                  de	j                  d	ee   dee	j                  ee	j                  e	j                  f   f   fd
Zde	j                  dede	j                  de	j                  de	j                  f
dZd#dZde	j                  de	j                  dede	j                  fdZde	j                  de	j                  de	j                  de	j                  de	j                  de	j                  defdZd Zde	j                  de	j                  de	j                  de	j                  dee	j                  e	j                  f   f
dZ	 	 	 d$de	j                  de	j                  dedede	j                  dee   d	ee   dee	j                  e	j                  e	j                  eef   fdZee	 	 	 d$de	j0                  d ee	j2                     dee   d	ee   deeef   f
d!              Z xZS )%LightGlueForKeypointMatchingar  
    LightGlue is a model matching keypoints in images by leveraging detections from a keypoint detector such as
    SuperPoint. It is based on the SuperGlue architecture and is designed to be lightweight and efficient.
    It consists of :
        1. Keypoint Encoder
        2. A Graph Neural Network with self and cross attention layers
        3. Matching Assignment layers

    The correspondence ids use -1 to indicate non-matching points.

    Philipp Lindenberger, Paul-Edouard Sarlin and Marc Pollefeys. LightGlue: Local Feature Matching at Light Speed.
    In ICCV 2023. https://huggingface.co/papers/2306.13643
    r2   c           	      ,   t         |   |       t        j                  |j                  |j
                        | _        |j                  j                  | _        |j                  | _	        |j                  | _        |j                  | _        |j                  | _        |j                  | _        | j                  | j                  k7  r2t        j                   | j                  | j                  d      | _        nt        j$                         | _        t'        |      | _        t        j*                  t-        |j                        D cg c]  }t/        ||       c}      | _        t        j*                  t-        |j                        D cg c]  }t3        |       c}      | _        t        j*                  t-        |j                  dz
        D cg c]  }t7        |       c}      | _        | j;                          y c c}w c c}w c c}w )N)trust_remote_codeTr4   )r   r   )r6   r7   r   from_configkeypoint_detector_configr/  keypoint_detectordescriptor_decoder_dim keypoint_detector_descriptor_dimr9   num_hidden_layers
num_layersfilter_thresholddepth_confidencewidth_confidencer   r8   input_projectionIdentityr1   positional_encoder
ModuleListranger   transformer_layersr   match_assignment_layersr   token_confidence	post_init)r=   r2   ir  r>   s       r/   r7   z%LightGlueForKeypointMatching.__init__  s    !>!J!J++v?W?W"
 170O0O0f0f-$33 22 & 7 7 & 7 7 & 7 7$"G"GG$&IId.S.SUYUhUhos$tD!$&KKMD!"<V"D"$--EJ6KcKcEde&v;e#
 (*}}<A&BZBZ<[\q*62\(
$ !#<A&BZBZ]^B^<_`q*62`!
 	 f ] as   HHHlayer_indexr@   c                     ddt        j                  d|z  | j                  z        z  z   }t        j                  |dd      S )z-scaled confidence threshold for a given layerg?g?g      r   r   )npr  r6  clip)r=   rD  r  s      r/   _get_confidence_thresholdz6LightGlueForKeypointMatching._get_confidence_threshold  s;    #tk'9DOO'K LLL	wwy!Q''r.   r   r   r?   c                     |j                         j                         }| j                  |      }| j                  ||      }||fS )Nr?   )r  r   r:  r<  )r=   r   r   r?   projected_descriptorskeypoint_encoding_outputs         r/   _keypoint_processingz1LightGlueForKeypointMatching._keypoint_processing  sO     "((*557 $ 5 5k B#'#:#:9[o#:#p $&>>>r.   keypoint_confidencesr!   
num_pointsc                 |   |j                   \  }}|| j                  dz
  k  ru|j                  |dk(  d      }|j                  |dz  d      }| j	                  |      }d||k  j                         j                  d      |z  z
  }|| j                  kD  }	|	S t        j                  |t        j                        }	|	S )zRevaluate whether we should stop inference based on the confidence of the keypointsr   r   r   rB   g      ?rC   r]   )rk   r6  r   rm   rH  r_   sumr8  r(   onesrP   )
r=   rN  rD  r!   rO  r   r  r  ratio_confidentearly_stopped_pairss
             r/   _get_early_stopped_image_pairsz;LightGlueForKeypointMatching._get_early_stopped_image_pairs'  s     


A1,, $8#C#CDAIq#Q #7#?#?
aQS#T 66{CI!%9I%E$L$L$N$R$RWX$R$Y\f$ffO"1D4I4I"I
 #" #(**Zuzz"J""r.   c                     |
||   }||   } | j                   |   ||      }t        || j                        \  }}||fS r   )r@  r#  r7  )r=   r   r!   rD  early_stopsr   r   r   s           r/   _get_keypoint_matchingz3LightGlueForKeypointMatching._get_keypoint_matching:  sW    "%k2K$D:--k:;M#:64CXCX#Y ''r.   confidencesr   c                 \    |d| j                   z
  kD  }|||| j                  |      k  z  }|S )z#mask points which should be removedr   )r9  rH  )r=   rY  r   rD  keeps        r/   _get_pruning_maskz.LightGlueForKeypointMatching._get_pruning_maskB  s<    T2223"K4#A#A+#NNNDr.   r  prune_outputc                    |j                   \  }}	}	| j                  |   j                  |      }
| j                  ||
|      j	                  |dk(  t        j                  d            fd||d   |d   |fD        \  }}}}}t        |      D ]  }||||   fxx   dz  cc<    d ||||fD        \  }}}}||f}t        |dd      }|||||fS )	z
        For a given layer, prune keypoints based on the confidence of the keypoints and the matchability of the
        descriptors.
        r   Fc              3   n   K   | ]&  }t        |      D cg c]
  \  }}||    c}} ( y c c}}w wr   )zip).0r   tr!   pruned_keypoints_masks       r/   	<genexpr>zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>]  s9      c
 %(0E$FGDQtWGc
Gs   5/5r   c              3   6   K   | ]  }t        |d         yw)T)batch_firstNr   )ra  pruned_tensors     r/   rd  zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>e  s$      S
 D99S
s   TrB   rf  padding_value)	rk   r@  r   r\  r   r(   r   r>  r	   )r=   r   r   r!   r  r]  rN  rD  r   r  descriptors_matchabilitypruned_descriptorspruned_keypoints_0pruned_keypoints_1pruned_maskpruned_indicesrC  pruned_keypointsrc  s                     @r/   _do_layer_keypoint_pruningz7LightGlueForKeypointMatching._do_layer_keypoint_pruningI  s+    ',,
Aq#'#?#?#L#]#]^i#j  $ 6 67KMegr s 5 A A$!)U\\Z_M` ac
&	!ilDY[bcc
_.0BKQ_ z" 	4AN1--.!3.	4S
"46HJ\^i!jS
O.0BK /0BC%n$VXY!#3^[R^^^r.   c                     t        j                        d ||fD        \  }}d ||fD        \  }}fd||||fD        \  }}}}||||fS )Nc              3   8   K   | ]  }t        |d d        yw)TrB   rh  Nr   ra  r   s     r/   rd  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>w  s$      3
 TDD3
   c              3   8   K   | ]  }t        |d d        yw)Tr   rh  Nr   rt  s     r/   rd  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>{  s$      >
 TCC>
ru  c              3   (   K   | ]	  }|     y wr   r-   )ra  r   early_stops_indicess     r/   rd  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s!      g
 &'g
s   )r(   rU   )r=   rx  final_pruned_keypoints_indices!final_pruned_keypoints_iterationsr   r   s    `    r/   _concat_early_stopped_outputsz:LightGlueForKeypointMatching._concat_early_stopped_outputsn  s     $kk*=>3
"$BC3
//>
*,MN>
::g
 .1	g
c"@Bc ./PRY[jjjr.   r   r   r   c                    |j                   \  }fd|||fD        \  }}}|d d df   }|d d df   }|d d df   }|d d df   }	|d d df   }
|d d df   }t        j                  dz  d|fd|j                  |j                        }t        j
                  dz  d|f|j                  |j                        }t        dz        D ]  }t        j                  ||   dk(  d||   j                  d||   j                  d                  ||d||   f<   t        j                  |	|   dk(  d||   j                  d|	|   j                  d                  ||d||   f<   |
|   ||d||   f<   ||   ||d||   f<    ||fS )Nc              3   J   K   | ]  }|j                  d z  d d        yw)r   rB   N)rm   )ra  r   r   s     r/   rd  zJLightGlueForKeypointMatching._do_final_keypoint_pruning.<locals>.<genexpr>  s'      -
7=FNN:?Ar2-
s    #r   r   r   rB   r'  )r   )
rk   r(   fullr   r^   zerosr>  r  r  clamp)r=   r  r   r   r   r  r  r  r  r  r  r   _matches_matching_scoresrC  r   s                  @r/   _do_final_keypoint_pruningz7LightGlueForKeypointMatching._do_final_keypoint_pruning  s     
A-
BI7TcAd-
)/ 1a4=1a4=1a4=1a4=*1a40*1a40 ::zQ=A2gnndkdqdqr ;;1_a/oNcNc
 zQ' 	FA*/++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' +0++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' 3C12EQ8A;./2B12EQ8A;./	F )))r.   r$  r%  r   c           
      &	  ( |rdnd }|rdnd }	|j                   d   dk(  rT|j                   d d }
|j                  |
dt        j                        |j	                  |
      |j	                  |
      ||	fS |j
                  }|j                   \  }}}}t        j                  |j                  |d      d      }|j                  |dz  |d      }||j                  |dz  |      nd }|j                  |dz  || j                        }t        j                  |dz  |      }t        |||      }| j                  |||	      \  }}|d   }| j                  dkD  }| j                  dkD  }g }g }g }g }g }t        j                  d||      j                  |dz  d      }t        j                  |      }t!        | j"                        D ]3  }|j%                         }|| j'                  ||      }n&t        j(                  ||d
   f|j
                        } | j*                  |   |||||      }|\  }}} |r||z   }|r|	| z   }	|r|| j"                  dz
  k  r+ | j,                  |   |      }!| j/                  |!|||      }"n%t        j(                  |t        j0                        }"t        j2                  |"      r|"j5                  d      (|(   }#| j7                  |||(      \  }$}%|j9                  t;        |#             |j9                  t;        |$             |j9                  t;        |%             |r:|j9                  t;        |(                |j9                  t;        |(                ||"    }t=        (fd||d   |d   ||fD              \  }}&}'}}|&|'f}|rt=        (fd||!fD              \  }}}!t        j>                  |"      r n$|s| jA                  |||||!|      \  }}}}}6 |r4|r2| jC                  |||||      \  }}}}| jE                  ||||      \  }}nE| j7                  ||| j"                  dz
        \  }}t        j                  |      | j"                  z  }|j                  |d|      }|||||	fS )Nr-   r   r   rB   r]   r   rC   r   rJ  rT   )rw   r?   r   )rO  )rW  c              3   *   K   | ]
  }|      y wr   r-   ra  r   rW  s     r/   rd  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      V" |,V   c              3   *   K   | ]
  }|      y wr   r-   r  s     r/   rd  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      l & #K<0lr  )#rk   r   r(   r   	new_zerosr   rQ  rm   r4  r  r+  rM  r8  r9  rl   	ones_liker>  r6  r(  get_extended_attention_maskrR  r?  rA  rU  rP   anyrE   rX  extendlistr,   allrq  r{  r  ))r=   r   r   r$  r%  r!   r   r?   r   r   rk   r   r   r  initial_num_keypointsnum_points_per_pairimage_indicesrL  do_early_stopdo_keypoint_pruningrx  r   r   ry  rz  pruned_keypoints_indicespruned_keypoints_iterationsrD  r   extended_attention_masklayer_outputr"   	attentionrN  rT  early_stopped_image_indicesearly_stopped_matchesearly_stopped_matching_scoreskeypoints_0
keypoint_1rW  s)                                           @r/   _match_image_pairz.LightGlueForKeypointMatching._match_image_pair  sg    #7BD0d??1"OOCR(E""5"EII">##E*##E*!  !!2;///
A,a#iiZ(D!L%%j1n6KQO	FJFVt||JN,AB\`!))*q.:OQUQvQvwZ!^FC'	65A	040I0I9M 1J 1
-- -Q/	 --1 #33a7 )+&,.)#(<<3HQW#X#_#_`jmn`npr#s &+oo6N&O# 1 R	K%**,K*.*J*J4Q\*]'*/**j+b/5R[d[k[k*l'?422;?6%9"3L 5A1K	#$5$E! !/)!;1!44+M4+@+@+Mk+Z( +/*M*M,k4L_ +N +'
 +0**Zuzz*R'9901 #6"G"G"JK2?2L/KOKfKf#T;K Lg LH)+H (..t4O/PQNN4(=#>?#**40M+NO*6==dC[\gCh>ij9@@FabmFnAop +>?R>R*S'PU V'2IaL)A,PTVc&dV QMKj$ "-j 9I*fk l !9 ; 4+l gc02MOc 9901" 33#!03,# dY(@$HcQR	h 0 22'25# h*,MwXg (,'F'F.%	($G_ (,'B'B;PTVZVeVehiVi'j$G_050PSWSbSb0b-,M,U,U0-
)
 -
 	
r.   r  labelsc           
      0   d }|t        d      ||n| j                  j                  }||n| j                  j                  }|j                  dk7  s|j                  d      dk7  rt        d      |j                  \  }}}}	}
|j                  |dz  ||	|
      }| j                  |      }|d d \  }}}}|j                  |ddd      j                  |      }|j                  |dd| j                        j                  |      }|j                  |dd      }|j                         }|d d d d d d df   |
z  |d d d d d d df<   |d d d d d d df   |	z  |d d d d d d df<   | j                  |||	|
|||	      \  }}}}}t        ||||||||
      S )Nz9LightGlue is not trainable, no labels should be provided.   r   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)   rB   r   )r!   r   r?   )r   r   r   r   r    r!   r"   r#   )
ValueErrorr2   r   r?   ndimr(  rk   rm   r2  ra   r4  cloner  r   )r=   r  r  r   r?   r   r   r  channelsr$  r%  keypoint_detectionsr   r   r!   absolute_keypointsr   r   r    r"   r#   s                        r/   rM   z$LightGlueForKeypointMatching.forward\  s    XYY1B1N-TXT_T_TqTq$8$D $++JjJj 	 !\%6%6q%9Q%>noo1=1C1C.
Ax#++JNHfeT"44\B*=bq*A'	1k4%%j!R;>>|L	!))*aT=b=bcffgst||J2.&__.);Aq!QJ)G%)O1aA:&);Aq!QJ)G&)P1aA:&EIE[E[/!5 F\ F
B%
 /+'!	
 		
r.   rN   r   )NNN)r$   r%   r&   r'   r   r7   r   r_   rH  r(   rO   r   rP   r,   rM  rU  rX  r\  rq  r{  r  r  r   r   r)   
LongTensorr   r   rM   rQ   rR   s   @r/   r-  r-    s    @(S (U ( jo? <<?49LL?X`aeXf?	u||U5<<#=>>	??#$)LL#?B#JO,,#didpdp#	#&(U\\ 5<< ^a fkfrfr #_\\#_ <<#_ ll	#_
 #_ ll#_ $ll#_ #_Jk8#*#* #* 	#*
 ||#* 
u||U\\)	*#*V ",0/3k
<<k
 \\k
 	k

 k
 llk
 $D>k
 'tnk
 
u||U\\5<<E	Fk
Z  .2,0/33
''3
 ))*3
 $D>	3

 'tn3
 
u55	63
  3
r.   r-  r   )r   )7dataclassesr   typingr   r   r   numpyrF  r(   r   torch.nn.utils.rnnr	   activationsr   modeling_flash_attention_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.deprecationr   utils.genericr   auto.modeling_autor   configuration_lightgluer   r   Moduler1   r[   rh   rO   r   rr   r_   r   r   r   r   r   r   r   r  r,   r#  r+  r-  __all__r-   r.   r/   <module>r     sX  ( " , ,    + ! B F & D D 0 - > 4  :k  :  :F "<<	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 U\\*% % % '(%4E) E)P299 "P>		 P>f-2\\JO,,
\\&BII &R	BII 	   $ELL $U $uU\\[`[g[gMgGh $@5<<  S U\\ , 
f
#; f

f
R &'E
Fr.   