
    h                        d dl Zd dlZd dlmZ d dlmZmZ d dlZ	d dl
Z
d dlmc mZ d dl
mZmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZm Z  ddl!m"Z" ddl#m$Z$  e       rd dl%m&Z&  e        rd dl'm(Z( d dl)m*Z* e ed       G d de                    Z+	 dNde
j                  de
j                  de
j                  fdZ,dededefdZ-de
j                  de
j                  de
j                  fdZ. G d dej^                        Z0deded e1defd!Z2de
j                  de
j                  d e1de
j                  fd"Z3 G d# d$ej^                        Z4 G d% d&ej^                        Z5 G d' d(ej^                        Z6	 dOd)ej^                  d*e
j                  d+e
j                  d,e
j                  d-ee
j                     d.e7d/e7fd0Z8 G d1 d2ej^                        Z9 G d3 d4ej^                        Z:dPd5e
j                  d6e7d7e;de
j                  fd8Z< G d9 d:ej^                        Z= G d; d<ej^                        Z> G d= d>ej^                        Z? G d? d@e      Z@ G dA dBej                        ZB G dC dDej^                        ZC G dE dFej^                        ZD G dG dHej^                        ZEe G dI dJe             ZF edK       G dL dMeF             ZGdJdMgZHy)Q    N)	dataclass)CallableOptional)Tensornn   )ACT2FN)ModelOutputis_scipy_availablerequires_backends)GradientCheckpointingLayer)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringis_accelerate_available)check_model_inputs   )
EomtConfig)linear_sum_assignment)PartialState)reducea  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )custom_introc                   <   e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeej                     ed<   dZeeej                        ed<   dZeeej                        ed<   dZeeej"                        ed	<   y)
"EomtForUniversalSegmentationOutputa+  
    loss (`torch.Tensor`, *optional*):
        The computed loss, returned when labels are present.
    class_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
        query. Note the `+ 1` is needed because we incorporate the null class.
    masks_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
        query.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
        Last hidden states (final feature map) of the last layer.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
    attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Self and Cross Attentions weights from transformer decoder.
    patch_offsets (`list[torch.Tensor]`, *optional*):
        list of tuples indicating the image index and start and end positions of patches for semantic segementation.
    Nlossclass_queries_logitsmasks_queries_logitslast_hidden_statehidden_states
attentionspatch_offsets)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r    r!   tupler"   r#   listr        d/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/eomt/modeling_eomt.pyr   r   2   s    * )-D(5$$
%,8<(5#4#45<8<(5#4#45<59x 1 1298<M8E%"3"345<59Ju0012926M8D./6r.   r   input_featurespoint_coordinatesreturnc                     |j                         dk(  rd}|j                  d      }t        j                  j                  j
                  | d|z  dz
  fi |}|r|j                  d      }|S )a(  
    A wrapper around `torch.nn.functional.grid_sample` to support 3D point_coordinates tensors.

    Args:
        input_features (`torch.Tensor` of shape (batch_size, channels, height, width)):
            A tensor that contains features map on a height * width grid
        point_coordinates (`torch.Tensor` of shape (batch_size, num_points, 2) or (batch_size, grid_height, grid_width,:
        2)):
            A tensor that contains [0, 1] * [0, 1] normalized point coordinates
        add_dim (`bool`):
            boolean value to keep track of added dimension

    Returns:
        point_features (`torch.Tensor` of shape (batch_size, channels, num_points) or (batch_size, channels,
        height_grid, width_grid):
            A tensor that contains features for points in `point_coordinates`.
    r   T   g       @      ?)dim	unsqueezer(   r   
functionalgrid_samplesqueeze)r0   r1   add_dimkwargspoint_featuress        r/   sample_pointr>   ]   st    ( !#-77: XX((44^SK\E\_bEbmflmN'//2r.   inputslabelsc                    | j                         j                  d      } dt        j                  | |j                        z  }| j                  d      dddf   |j                  d      dddf   z   }d|dz   |dz   z  z
  }|S )a  
    A pair wise version of the dice loss, see `dice_loss` for usage.

    Args:
        inputs (`torch.Tensor`):
            A tensor representing a mask
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).

    Returns:
        `torch.Tensor`: The computed loss between each pairs.
    r   r4   N)sigmoidflattenr(   matmulTsum)r?   r@   	numeratordenominatorr   s        r/   pair_wise_dice_lossrJ   }   s|     ^^%%a(FELL22I**R.D)FJJrN47,CCK	A+/22DKr.   c                 \   | j                   d   }t        j                  d      } || t        j                  |             } || t        j
                  |             }t        j                  ||z  |j                        }t        j                  ||z  d|z
  j                        }||z   }|S )a  
    A pair wise version of the cross entropy loss, see `sigmoid_cross_entropy_loss` for usage.

    Args:
        inputs (`torch.Tensor`):
            A tensor representing a mask.
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).

    Returns:
        loss (`torch.Tensor`): The computed loss between each pairs.
    r   none	reduction)shaper   BCEWithLogitsLossr(   	ones_like
zeros_likerE   rF   )	r?   r@   height_and_width	criterioncross_entropy_loss_poscross_entropy_loss_negloss_posloss_negr   s	            r/   $pair_wise_sigmoid_cross_entropy_lossrY      s     ||A$$v6I&vuv/FG&vu/?/?/GH||25EEvxxPH||25EEF
~~VHhDKr.   c                        e Zd ZdZ	 ddedededef fdZ ej                         dej                  dej                  d	ej                  d
ej                  de
ee	      f
d       Z xZS )EomtHungarianMatcheraq  This class computes an assignment between the labels and the predictions of the network.

    For efficiency reasons, the labels don't include the no_object. Because of this, in general, there are more
    predictions than labels. In this case, we do a 1-to-1 matching of the best predictions, while the others are
    un-matched (and thus treated as non-objects).
    
cost_class	cost_mask	cost_dice
num_pointsc                     t         |           |dk(  r|dk(  r|dk(  rt        d      || _        || _        || _        || _        y)aH  Creates the matcher

        Params:
            cost_class (`float`, *optional*, defaults to 1.0):
                Relative weight of the classification error in the matching cost.
            cost_mask (`float`, *optional*,  defaults to 1.0):
                This is the relative weight of the focal loss of the binary mask in the matching cost.
            cost_dice (`float`, *optional*, defaults to 1.0):
                This is the relative weight of the dice loss of the binary mask in the matching cost.
            num_points (`int`, *optional*, defaults to 12544):
                No. of points to sample on which the mask loss will be calculated. The same set of K points are
                uniformly sampled for all prediction and ground truth masks to construct the cost matrix for bipartite
                matching.
        r   zAll costs can't be 0N)super__init__
ValueErrorr_   r\   r]   r^   )selfr\   r]   r^   r_   	__class__s        r/   rb   zEomtHungarianMatcher.__init__   sK    " 	?yA~)q.344$$""r.   r   r   mask_labelsclass_labelsr2   c           	         g }|j                   d   }t        |      D ]  }||   j                  d      }||   }	|dd||   f    }
||   j                  |	      }|dddf   }|	dddf   }	t	        j
                  d| j                  d|	j                        }|j                  |j                   d   dd      }t        ||d      j                  d      }|j                  |	j                   d   dd      }t        |	|d      j                  d      }	t        |	|      }t        |	|      }| j                  |z  | j                  |
z  z   | j                  |z  z   }t	        j                   |t	        j"                  d	            }t	        j$                  |t	        j"                  d
            }t	        j&                  |d      }t)        |j+                               }|j-                  |        |D cg c]O  \  }}t	        j.                  |t        j0                        t	        j.                  |t        j0                        fQ }}}|S c c}}w )ao  
        Params:
            masks_queries_logits (`torch.Tensor`):
                A tensor of dim `batch_size, num_queries, num_labels` with the classification logits.
            class_queries_logits (`torch.Tensor`):
                A tensor of dim `batch_size, num_queries, height, width` with the predicted masks.
            class_labels (`torch.Tensor`):
                A tensor of dim `num_target_boxes` (where num_target_boxes is the number of ground-truth objects in the
                target) containing the class labels.
            mask_labels (`torch.Tensor`):
                A tensor of dim `num_target_boxes, height, width` containing the target masks.

        Returns:
            matched_indices (`list[tuple[Tensor]]`): A list of size batch_size, containing tuples of (index_i, index_j)
            where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected labels (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes).
        r   rB   Nr   r4   deviceFalign_cornersg    _Bg    _©dtype)rO   rangesoftmaxtor(   randr_   rj   repeatr>   r:   rY   rJ   r]   r\   r^   minimumtensormaximum
nan_to_numr   cpuappend	as_tensorint64)rd   r   r   rf   rg   indices
batch_sizei
pred_probs	pred_maskr\   target_maskr1   target_coordinatespred_coordinatesr]   r^   cost_matrixassigned_indicesjmatched_indicess                        r/   forwardzEomtHungarianMatcher.forward   s/   8 *, *//2
z" 	-A-a088<J,Q/I %QQ%788J%a.++I6K%ag.K!!T'*I !&

1dooqIYIY Z!2!9!9+:K:KA:NPQST!U&{4FV[\ddefgK077	8JAqQ$Y0@PUV^^_`aI =YTI+I{CI..94t7SSVZVdVdgpVppK--U\\$5GHK--U\\%5HIK**;:K0EkooFW0XNN+,?	-F ho
_c_`bcU__Qekk2EOOAU[[4YZ
 
 
s   5AI)r5   r5   r5   i 1  )r$   r%   r&   r'   floatintrb   r(   no_gradr   r,   r+   r   __classcell__re   s   @r/   r[   r[      s     jo##27#JO#cf#4 U]]_D#llD $llD \\	D
 llD 
eFm	D Dr.   r[   	num_masksc                     | j                         j                  d      }d||z  j                  d      z  }|j                  d      |j                  d      z   }d|dz   |dz   z  z
  }|j                         |z  }|S )a4  
    Compute the DICE loss, similar to generalized IOU for masks as follows:

    $$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x \cap y }{x \cup y + 1}} $$

    In practice, since `labels` is a binary mask, (only 0s and 1s), dice can be computed as follow

    $$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x * y }{x + y + 1}} $$

    Args:
        inputs (`torch.Tensor`):
            A tensor representing a mask.
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).
        num_masks (`int`):
            The number of masks present in the current batch, used for normalization.

    Returns:
        `torch.Tensor`: The computed loss.
    r   r4   rB   )rC   rD   rG   )r?   r@   r   probsrH   rI   r   s          r/   	dice_lossr     sz    , NN$$Q'EUV^((,,I))B-&**R.0K	A+/22D88:	!DKr.   c                     t        j                  d      } || |      }|j                  d      j                         |z  }|S )a|  
    Args:
        inputs (`torch.Tensor`):
            A float tensor of arbitrary shape.
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).

    Returns:
        loss (`torch.Tensor`): The computed loss.
    rL   rM   r   )r   rP   meanrG   )r?   r@   r   rT   cross_entropy_lossr   s         r/   sigmoid_cross_entropy_lossr   7  sD     $$v6I"662""1%))+i7DKr.   c                       e Zd Zdedeeef   f fdZdeee	      dee	   fdZ
dee   deeef   fdZd	ed
ee   deej                     deeef   fdZdej                  deej                     deej                     de	deeej                  f   f
dZd Zd Zdej                  dej                  fdZdej                  de	de	dedej                  f
dZ	 ddej                  d	ej                  deej                     d
eej                     deeeej                  f      deeej                  f   fdZd
ej                  dej2                  dej                  fdZ xZS )EomtLossconfigweight_dictc                    t         |           t        | dg       |j                  | _        || _        |j
                  | _        t        j                  | j                  dz         }| j                  |d<   | j                  d|       |j                  | _        |j                  | _        |j                  | _        t        |j                  |j                   |j"                  | j                        | _        y)aH  
        The Eomt Loss. The loss is computed very similar to DETR. The process happens in two steps: 1) we
        compute hungarian assignment between ground truth masks and the outputs of the model 2) we supervise each pair
        of matched ground-truth / prediction (supervise class and mask)

        Args:
            config (`EomtConfig`):
                The configuration for Eomt model also containing loss calculation specific parameters.
            weight_dict (`dict[str, float]`):
                A dictionary of weights to be applied to the different losses.
        scipyr   rB   empty_weight)r\   r^   r]   r_   N)ra   rb   r   
num_labelsr   no_object_weighteos_coefr(   onesregister_buffertrain_num_pointsr_   oversample_ratioimportance_sample_ratior[   class_weightdice_weightmask_weightmatcher)rd   r   r   r   re   s       r/   rb   zEomtLoss.__init__L  s     	$	* ++& //zz$//A"56==R^\: !11 & 7 7'-'E'E$+**((((	
r.   sizesr2   c                 n    |d   }|dd  D ]'  }t        |      D ]  \  }}t        ||   |      ||<    ) |S )Nr   r   )	enumeratemax)rd   r   maxessublistindexitems         r/   _max_by_axiszEomtLoss._max_by_axiso  sS    aQRy 	7G(1 7t"5<6e7	7 r.   tensorsc                 `   | j                  |D cg c]  }t        |j                         c}      }t        |      g|z   }|\  }}}}|d   j                  }	|d   j
                  }
t        j                  ||	|
      }t        j                  |||ft        j                  |
      }t        |||      D ]o  \  }}}|d |j                  d   d |j                  d   d |j                  d   f   j                  |       d|d |j                  d   d |j                  d   f<   q ||fS c c}w )Nr   rn   rj   r   r4   F)r   r,   rO   lenrn   rj   r(   zerosr   boolzipcopy_)rd   r   ru   max_sizebatch_shaper}   _heightwidthrn   rj   padded_tensorspadding_maskspadded_tensorpadding_masks                  r/   _pad_images_to_max_in_batchz$EomtLoss._pad_images_to_max_in_batchw  s7   $$w%OVd6<<&8%OP7|nx/'2$
Avu
  ""[fM

J#>ejjY_`36wP]3^ 	G/FM<+FLLO+->v||A->@Q&,,q/@QQRXXY_`AFL*6<<?*,=fll1o,==>	G },, &Ps   D+r   rg   r|   c           	         |}|j                   \  }}}t        j                  | j                        }| j	                  |      }	t        j                  t        ||      D 
cg c]  \  }
\  }}|
|    c}}}
      }t        j                  ||f| j                  t
        j                  |j                        }|||	<   |j                  dd      } |||      }d|i}|S c c}}}
w )a  Compute the losses related to the labels using cross entropy.

        Args:
            class_queries_logits (`torch.Tensor`):
                A tensor of shape `batch_size, num_queries, num_labels`
            class_labels (`list[torch.Tensor]`):
                List of class labels of shape `(labels)`.
            indices (`tuple[np.array])`:
                The indices computed by the Hungarian matcher.

        Returns:
            `dict[str, Tensor]`: A dict of `torch.Tensor` containing the following key:
            - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
        )weight)
fill_valuern   rj   r   r4   loss_cross_entropy)rO   r   CrossEntropyLossr   $_get_predictions_permutation_indicesr(   catr   fullr   r{   rj   	transpose)rd   r   rg   r|   pred_logitsr}   num_queriesr   rT   idxtargetr   target_classes_otarget_classespred_logits_transposedloss_celossess                    r/   loss_labelszEomtLoss.loss_labels  s    " +%0%6%6"
K''t/@/@A	77@ 99-0w-GHH>66AqVAYH
 %$//]h]o]o
 /s!,!6!6q!!<2NC&0 Is   #C!r   rf   r   c                      j                  |      } j                  |      }||   } j                  |      \  }}	||   }|dddf   }|dddf   }t        j                         5   j                  | fd j                   j                   j                        }
t        ||
d      j                  d      }ddd       t        |
d      j                  d      }t        ||      t        |||      d}~~|S # 1 sw Y   ExY w)a  Compute the losses related to the masks using sigmoid_cross_entropy_loss and dice loss.

        Args:
            masks_queries_logits (`torch.Tensor`):
                A tensor of shape `(batch_size, num_queries, height, width)`.
            mask_labels (`torch.Tensor`):
                List of mask labels of shape `(labels, height, width)`.
            indices (`tuple[np.array])`:
                The indices computed by the Hungarian matcher.
            num_masks (`int)`:
                The number of masks, used for normalization.

        Returns:
            losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing two keys:
            - **loss_mask** -- The loss computed using sigmoid cross entropy loss on the predicted and ground truth.
              masks.
            - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth,
              masks.
        Nc                 &    j                  |       S N)calculate_uncertainty)logitsrd   s    r/   <lambda>z%EomtLoss.loss_masks.<locals>.<lambda>  s    t99&A r.   Frk   r   )	loss_mask	loss_dice)r    _get_targets_permutation_indicesr   r(   r   sample_points_using_uncertaintyr_   r   r   r>   r:   r   r   )rd   r   rf   r|   r   src_idxtgt_idx
pred_maskstarget_masksr   r1   point_labelspoint_logitsr   s   `             r/   
loss_maskszEomtLoss.loss_masks  s,   4 ;;GD77@)'2
 ::;Ga#G,  4(
#AtG, ]]_ 		i $ D DA%%,,! (6GW\]eefghL		i $J0AQVW__`ab 4L,PYZ"<yI

 )		i 		is   (AD  D	c                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r   r(   r   r   	full_like)rd   r|   r~   srcr   batch_indicespredictions_indicess          r/   r   z-EomtLoss._get_predictions_permutation_indices  sj    		iX_N`"a"a{q(35??3#:"ab#iiW(E#q(EF111 #b(E   #A7A>
c                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r   r   )rd   r|   r~   r   tgtr   target_indicess          r/   r   z)EomtLoss._get_targets_permutation_indices  sh    		iX_N`"a"a{q(1c5??3#:"ab#@HQC#@An,, #b#@r   r   c                 2    t        j                  |       }|S )a  
        In Eomt paper, uncertainty is estimated as L1 distance between 0.0 and the logit prediction in 'logits'
        for the foreground class in `classes`.

        Args:
            logits (`torch.Tensor`):
            A tensor of shape (R, 1, ...) for class-specific or class-agnostic, where R is the total number of predicted masks in all images and C is:
            the number of foreground classes. The values are logits.

        Returns:
            scores (`torch.Tensor`): A tensor of shape (R, 1, ...) that contains uncertainty scores with the most
            uncertain locations having the highest uncertainty score.
        )r(   abs)rd   r   uncertainty_scoress      r/   r   zEomtLoss.calculate_uncertainty  s      %yy01!!r.   r_   r   r   c           	         |j                   d   }t        ||z        }t        j                  ||d|j                        }t        ||d      }	 ||	      }
t        ||z        }||z
  }t        j                  |
dddddf   |d      d   }|t        j                  |t        j                  |j                  	      z  }||dddf   z  }|j                  d
d      |j                  d
      ddf   j                  ||d      }|dkD  r:t        j                  |t        j                  ||d|j                        gd      }|S )a  
        This function is meant for sampling points in [0, 1] * [0, 1] coordinate space based on their uncertainty. The
        uncertainty is calculated for each point using the passed `uncertainty function` that takes points logit
        prediction as input.

        Args:
            logits (`float`):
                Logit predictions for P points.
            uncertainty_function:
                A function that takes logit predictions for P points and returns their uncertainties.
            num_points (`int`):
                The number of points P to sample.
            oversample_ratio (`int`):
                Oversampling parameter.
            importance_sample_ratio (`float`):
                Ratio of points that are sampled via importance sampling.

        Returns:
            point_coordinates (`torch.Tensor`):
                Coordinates for P sampled points.
        r   r4   ri   Frk   Nr   )kr6   r   rB   r6   )rO   r   r(   rr   rj   r>   topkarangelongviewr   )rd   r   uncertainty_functionr_   r   r   	num_boxesnum_points_sampledr1   r   point_uncertaintiesnum_uncertain_pointsnum_random_pointsr   shifts                  r/   r   z(EomtLoss.sample_points_using_uncertainty  sI   < LLO	 .>!>? "JJy2DaPVP]P]^#F,=US2<@"#:Z#GH&)==jj,Q1W59MSTUVWX"U\\)5::V\VcVc%dduQW~-222q9#((2,/JOOPY[oqrsq  %		"EJJy:KQW]WdWd$ef! ! r.   auxiliary_predictionsc                    | j                  ||||      }| j                  ||d   j                        }i | j                  ||||      | j	                  |||      }|jt        |      D ]\  \  }	}
|
d   }|
d   }| j                  ||||      }|j                         D ci c]  \  }}| d|	 | }}}|j                  |       ^ |S c c}}w )a  
        This performs the loss computation.

        Args:
            masks_queries_logits (`torch.Tensor`):
                A tensor of shape `(batch_size, num_queries, height, width)`.
            class_queries_logits (`torch.Tensor`):
                A tensor of shape `(batch_size, num_queries, num_labels)`.
            mask_labels (`torch.Tensor`):
                List of mask labels of shape `(labels, height, width)`.
            class_labels (`list[torch.Tensor]`):
                List of class labels of shape `(labels)`.
            auxiliary_predictions (`dict[str, torch.Tensor]`, *optional*):
                if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], then it contains the logits from
                the inner layers of the EomtMaskedAttentionDecoder.

        Returns:
            losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing three keys:
            - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
            - **loss_mask** -- The loss computed using sigmoid cross_entropy loss on the predicted and ground truth
              masks.
            - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth
              masks.
            if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], the dictionary contains additional
            losses for each auxiliary predictions.
        r   ri   r   r   r   )	r   get_num_masksrj   r   r   r   r   itemsupdate)rd   r   r   rf   rg   r  r|   r   r   r   aux_outputs	loss_dictkeyvalues                 r/   r   zEomtLoss.forward<  s
   H ,,35I;Xde&&|LO<R<R&S	%
oo2K)T%
3\7K%

 !,$-.C$D ) ['23I'J$'23I'J$ LL)=?SU`bno	EN__EVWzsEuAcU^U2W	Wi()  Xs   "Crj   c                 P   t        |D cg c]  }t        |       c}      }t        j                  |t        j                  |      }d}t               r2t        j                  i k7  rt        |      }t               j                  }t        j                  ||z  d      }|S c c}w )zk
        Computes the average number of target masks across the batch, for normalization purposes.
        r   r   )min)rG   r   r(   rz   r   r   r   _shared_stater   num_processesclamp)rd   rg   rj   classesr   
world_sizes         r/   r  zEomtLoss.get_num_maskss  s     \B'WBC	OOIU[[P	
"$))R/"9-	)^99
KK	J 6A>	 Cs   B#r   )r$   r%   r&   r   dictstrr   rb   r,   r   r   r   r+   r   nparrayr   r(   r   r   r   r   r   r   r   rj   r  r   r   s   @r/   r   r   K  s   !
z !
S%Z8H !
F$tCy/ d3i -4< -E&RX.DY -" $* :>v, QVWYW_W_Q` 	c6k	 D<#ll< %,,'< rxx	<
 < 
c5<<	 <|2-"ELL "U\\ ""5!5! 	5!
 5! "'5! 
5!z DH5#ll5 $ll5 %,,'	5
 5<<(5  (S%,,->(?@5 
c5<<	 5n%,,  QVQ]Q] r.   r   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZS )EomtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    c                    t         |           |j                  |j                  }}|j                  |j
                  }}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _        || _
        t        j                  ||||      | _        y )Nr   r   kernel_sizestride)ra   rb   
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterablenum_patchesr   Conv2d
projection)rd   r   r  r  r  r   r%  re   s          r/   rb   zEomtPatchEmbeddings.__init__  s    !'!2!2F4E4EJ
$*$7$79K9Kk#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&))L+:^hir.   pixel_valuesr2   c                     |j                   d   }|| j                  k7  rt        d| j                   d| d      | j                  |      j	                  d      j                  dd      }|S )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r4   )rO   r  rc   r'  rD   r   )rd   r(  r  
embeddingss       r/   r   zEomtPatchEmbeddings.forward  sz    #))!,4,,,!../yaI  __\2::1=GG1M
r.   )	r$   r%   r&   r'   rb   r(   r   r   r   r   s   @r/   r  r    s)    jELL U\\ r.   r  c                   d     e Zd ZdZdeddf fdZdej                  dej                  fdZ xZ	S )EomtEmbeddingszM
    Construct the CLS token, mask token, position and patch embeddings.
    r   r2   Nc                    t         |           || _        |j                  | _        t	        j
                  t        j                  dd|j                              | _	        t	        j
                  t        j                  d|j                  |j                              | _        t        |      | _        | j                  j                  }t	        j                   |j"                        | _        d|j                  z   | _        t	        j(                  ||j                        | _        | j-                  dt        j.                  |      j1                  d      d       y )Nr   position_ids)r   rB   F)
persistent)ra   rb   r   r  r   	Parameterr(   randnr   	cls_tokenr   num_register_tokensregister_tokensr  patch_embeddingsr%  Dropouthidden_dropout_probdropoutnum_prefix_tokens	Embeddingposition_embeddingsr   r   expand)rd   r   r%  re   s      r/   rb   zEomtEmbeddings.__init__  s     ++ekk!Q8J8J&KL!||EKK6;U;UW]WiWi,jk 3F ;++77zz&"<"<=!"V%?%?!?#%<<V=O=O#P ^U\\+-F-M-Mg-Vchir.   r(  c                    |j                   \  }}}}| j                  j                  j                  j                  }| j                  |j                  |            }| j                  j                  |dd      }| j                  j                  |dd      }|| j                  | j                        z   }t        j                  |||gd      }| j                  |      }|S )Nrm   rB   r   r   )rO   r6  r'  r   rn   rq   r3  r=  r5  r<  r/  r(   r   r9  )rd   r(  r}   r   target_dtyper+  
cls_tokensr5  s           r/   r   zEomtEmbeddings.forward  s    *00
Aq!,,77>>DD**<???+NO
^^**:r2>
..55j"bI$":":4;L;L"MM
YY
OZHaP
\\*-
r.   )
r$   r%   r&   r'   r   rb   r(   r   r   r   r   s   @r/   r-  r-    s9    jz jd j ELL U\\ r.   r-  modulequeryr
  r  attention_maskscalingr9  c                    t        j                  ||j                  dd            |z  }|||z   }t        j                  j                  |dt         j                        j                  |j                        }t        j                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )NrB   )r6   rn   )ptrainingr   r4   )r(   rE   r   r   r8   rp   float32rq   rn   r9  rH  
contiguous)
rA  rB  r
  r  rC  rD  r9  r<   attn_weightsattn_outputs
             r/   eager_attention_forwardrM    s     <<s}}R'<=GL!#n4==((2U]](SVVW\WbWbcL==((6??([L,,|U3K''1-88:K$$r.   c            
            e Zd ZdZ fdZ	 ddej                  deej                     deej                  eej                     f   fdZ	 xZ
S )EomtAttentionz=Multi-headed attention from 'Attention Is All You Need' paperc                    t         |           || _        |j                  | _        |j
                  | _        | j                  | j                  z  | _        | j                  | j                  z  | j                  k7  r&t        d| j                   d| j                   d      | j                  dz  | _	        |j                  | _        d| _        t        j                  | j                  | j                        | _        t        j                  | j                  | j                        | _        t        j                  | j                  | j                        | _        t        j                  | j                  | j                        | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      F)ra   rb   r   r   	embed_dimnum_attention_heads	num_headshead_dimrc   scaleattention_dropoutr9  	is_causalr   Lineark_projv_projq_projout_projrd   r   re   s     r/   rb   zEomtAttention.__init__  s   ++33$..8==4>>)T^^;MdnnM] ^NN#2'  ]]D(
//ii?ii?ii?		$..$..Ar.   r!   rC  r2   c           
      :   |j                   \  }}}| j                  |      }| j                  |      }| j                  |      }	|j	                  ||| j
                  | j                        j                  dd      }|j	                  ||| j
                  | j                        j                  dd      }|	j	                  ||| j
                  | j                        j                  dd      }	t        }
| j                  j                  dk7  rt        | j                  j                     }
 |
| |||	|| j                  | j                  | j                  sdn| j                        \  }}|j!                  |||      j#                         }| j%                  |      }||fS )z#Input shape: Batch x Time x Channelr   r4   eager        )rW  rD  r9  )rO   r[  rY  rZ  r   rS  rT  r   rM  r   _attn_implementationr   rW  rU  rH  r9  reshaperJ  r\  )rd   r!   rC  r<   r}   
seq_lengthrQ  querieskeysvaluesattention_interfacerL  rK  s                r/   r   zEomtAttention.forward  sa    -:,?,?)
J	++m,{{=)]+,,z:t~~t}}U__`acdeyyZOYYZ[]^_ZT^^T]]S]]^_abc(?;;++w6"9$++:Z:Z"[$7nnJJ#}}C$,,	%
!\ "))*j)LWWYmmK0L((r.   r   )r$   r%   r&   r'   rb   r(   r   r   r+   r   r   r   s   @r/   rO  rO    sV    GB. 26$)||$) !.$)
 
u||Xell33	4$)r.   rO  c                   X     e Zd Zd fdZdej
                  dej
                  fdZ xZS )EomtLayerScaler2   c                     t         |           t        j                  |j                  t        j                  |j                        z        | _        y r   )	ra   rb   r   r1  layerscale_valuer(   r   r   lambda1r]  s     r/   rb   zEomtLayerScale.__init__  s8    ||F$;$;ejjI[I[>\$\]r.   hidden_statec                      || j                   z  S r   )rl  rd   rm  s     r/   r   zEomtLayerScale.forward#  s    dll**r.   r2   Nr$   r%   r&   rb   r(   r   r   r   r   s   @r/   ri  ri    s$    ^+ELL +U\\ +r.   ri  input	drop_probrH  c                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r`  r   r   )r   r   )rO   ndimr(   rr   rn   rj   floor_div)rr  rs  rH  	keep_probrO   random_tensoroutputs          r/   	drop_pathr{  '  s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr.   c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
EomtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nrs  r2   c                 0    t         |           || _        y r   )ra   rb   rs  )rd   rs  re   s     r/   rb   zEomtDropPath.__init__>  s    "r.   r!   c                 D    t        || j                  | j                        S r   )r{  rs  rH  rd   r!   s     r/   r   zEomtDropPath.forwardB  s    FFr.   c                      d| j                    S )Nzp=)rs  rd   s    r/   
extra_reprzEomtDropPath.extra_reprE  s    DNN#$$r.   r   )r$   r%   r&   r'   r   r   rb   r(   r   r   r  r  r   r   s   @r/   r}  r}  ;  sG    b#(5/ #T #GU\\ Gell G%C %r.   r}  c                   X     e Zd Zd fdZdej
                  dej
                  fdZ xZS )EomtMLPr2   c                 ~   t         |           |j                  x}}t        |j                  |j                  z        }t        j                  ||d      | _        t        |j                  t              rt        |j                     | _        n|j                  | _        t        j                  ||d      | _        y )NTbias)ra   rb   r   r   	mlp_ratior   rX  fc1r!  
hidden_actr  r	   
activationfc2rd   r   in_featuresout_featureshidden_featuresre   s        r/   rb   zEomtMLP.__init__J  s    %+%7%77lf0063C3CCD99[/Ef''-$V%6%67DO$//DO99_lFr.   rm  c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r  r  r  ro  s     r/   r   zEomtMLP.forwardU  s2    xx-|4xx-r.   rp  rq  r   s   @r/   r  r  I  s$    	GELL U\\ r.   r  c                   X     e Zd Zd fdZdej
                  dej
                  fdZ xZS )EomtSwiGLUFFNr2   c                 0   t         |           |j                  x}}t        |j                  |j                  z        }t        |dz  dz        dz   dz  dz  }t        j                  |d|z  d      | _        t        j                  ||d      | _        y )Nr4   r         Tr  )	ra   rb   r   r   r  r   rX  
weights_inweights_outr  s        r/   rb   zEomtSwiGLUFFN.__init__]  s    %+%7%77lf0063C3CCD2Q67!;AAE))K_1D4P99_lNr.   rm  c                     | j                  |      }|j                  dd      \  }}t        j                  j	                  |      |z  }| j                  |      S )Nr4   rB   r   )r  chunkr   r8   silur  )rd   rm  x1x2hiddens        r/   r   zEomtSwiGLUFFN.forwardf  sS    |4##A2#.B##B'",''r.   rp  rq  r   s   @r/   r  r  \  s$    O(ELL (U\\ (r.   r  c                        e Zd ZdZdeddf fdZ	 d	dej                  deej                     dej                  fdZ	 xZ
S )
	EomtLayerzCThis corresponds to the Block class in the original implementation.r   r2   Nc                    t         |           t        j                  |j                  |j
                        | _        t        |      | _        t        |      | _
        |j                  dkD  rt        |j                        nt        j                         | _        t        j                  |j                  |j
                        | _        |j                   rt#        |      | _        nt'        |      | _        t        |      | _        y )Nepsr`  )ra   rb   r   	LayerNormr   layer_norm_epsnorm1rO  	attentionri  layer_scale1drop_path_rater}  Identityr{  norm2use_swiglu_ffnr  mlpr  layer_scale2r]  s     r/   rb   zEomtLayer.__init__p  s    \\&"4"4&:O:OP
&v.*62@F@U@UX[@[f&;&;<acalalan\\&"4"4&:O:OP
  $V,DHvDH*62r.   r!   	head_maskc                 *   | j                  |      }| j                  ||      \  }}| j                  |      }| j                  |      |z   }| j	                  |      }| j                  |      }| j                  |      }| j                  |      |z   }|S r   )r  r  r  r{  r  r  r  )rd   r!   r  hidden_states_normself_attention_outputr   layer_outputs          r/   r   zEomtLayer.forward  s    
 "ZZ6#'>>2Di#P q $ 1 12G H '<=M zz-0xx-((6 ~~l3mCr.   r   )r$   r%   r&   r'   r   rb   r(   r   r   r   r   r   s   @r/   r  r  m  sP    M3z 3d 3& -1|| ELL) 
	r.   r  c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )EomtLayerNorm2dc                 *    t         |   |||       y )N)r  elementwise_affine)ra   rb   )rd   r  r  affinere   s       r/   rb   zEomtLayerNorm2d.__init__  s    36Jr.   rm  r2   c                     |j                  dddd      }t        j                  || j                  | j                  | j
                  | j                        }|j                  dddd      }|S )Nr   r4   r   r   )permuteF
layer_normnormalized_shaper   r  r  ro  s     r/   r   zEomtLayerNorm2d.forward  sb    #++Aq!Q7||L$2G2GVZV_V_aeaiaij#++Aq!Q7r.   )gư>Trq  r   s   @r/   r  r    s$    KELL U\\ r.   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtScaleLayerr   c                    t         |           |j                  }t        j                  ||dd      | _        t        |j                     | _        t        j                  ||dd|d      | _
        t        |      | _        y )Nr4   r  r   r   F)r  paddinggroupsr  )ra   rb   r   r   ConvTranspose2dconv1r	   r  r  r&  conv2r  layernorm2drd   r   r   re   s      r/   rb   zEomtScaleLayer.__init__  su    ((''[aXYZ
 !2!23YY

 +;7r.   r!   r2   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S r   )r  r  r  r  r  s     r/   r   zEomtScaleLayer.forward  sB    

=16

=1((7r.   	r$   r%   r&   r   rb   r(   r   r   r   r   s   @r/   r  r    s*    8z 8 U\\ ell r.   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtScaleBlockr   c                     t         |           |j                  | _        t	        j
                  t        | j                        D cg c]  }t        |       c}      | _        y c c}w r   )	ra   rb   num_upscale_blocks
num_blocksr   
ModuleListro   r  blockrd   r   r   re   s      r/   rb   zEomtScaleBlock.__init__  sG     33]]E$//DZ#[qN6$:#[\
#[s   A&r!   r2   c                 8    | j                   D ]
  } ||      } |S r   )r  )rd   r!   r  s      r/   r   zEomtScaleBlock.forward  s%    ZZ 	1E!-0M	1r.   r  r   s   @r/   r  r    s,    ]z ]
U\\ ell r.   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtMaskHeadr   c                    t         |           |j                  }t        j                  ||      | _        t        j                  ||      | _        t        j                  ||      | _        t        |j                     | _
        y r   )ra   rb   r   r   rX  r  r  fc3r	   r  r  r  s      r/   rb   zEomtMaskHead.__init__  sa    ((99[+699[+699[+6 !2!23r.   r!   r2   c                     | j                  | j                  |            }| j                  | j                  |            }| j                  |      }|S r   )r  r  r  r  r  s     r/   r   zEomtMaskHead.forward  sD    (?@(?@/r.   r  r   s   @r/   r  r    s*    4z 4U\\ ell r.   r  c                   d    e Zd ZU dZeed<   dZdZdZdgZ	dZ
dZeedZd	ej                   d
dfdZy)EomtPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r   eomtr(  Fr  T)r!   r"   rA  r2   Nc                    | j                   j                  }t        |t        j                  t        j
                  t        j                  f      rt        j                  j                  |j                  t        j                  d             |j                  xt        j                  j                  |j                        \  }}|dkD  rdt        j                  |      z  nd}t        j                  j                  |j                  | |       y y t        |t        j                        rJ|j                  j                   j#                  d       |j                  j                   j%                          y t        |t        j&                        rf|j                  j                   j)                  dd       |j*                  2|j                  j                   |j*                     j%                          y y t        |t,              rGt/        |d      r:|j0                  j                   j#                  | j                   j2                         y y t        |t4              rt        j                  j7                  |j8                  j                   j;                  t<        j>                        d|      j;                  |j8                  j@                        |j8                  _        |jB                  j                   j%                          y y )	N   )ar   r   r5   r`  )r   stdrl  )"r   initializer_ranger!  r   rX  r&  r  initkaiming_uniform_r   mathsqrtr  _calculate_fan_in_and_fan_outuniform_r  datafill_zero_r;  normal_padding_idxri  hasattrrl  rk  r-  trunc_normal_r3  rq   r(   rI  rn   r5  )rd   rA  r  fan_inr   bounds         r/   _init_weightsz!EomtPreTrainedModel._init_weights  s   kk++fryy"))R5G5GHIGG$$V]]diil$C{{&GGAA&--P	17!DIIf--  ufe< ' -MM$$S)KK""$-MM&&CQ&7!!-""6#5#56<<> ./vy)##))$++*F*FG */$&GG$9$9  %%((7cs %: %b!!''( ! ""''--/	 0r.   )r$   r%   r&   r'   r   r*   base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attnr  rO  _can_record_outputsr   Moduler  r-   r.   r/   r  r    sX    
 $O&+#$N"#
0BII 0$ 0r.   r  zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                       e Zd ZdZdef fdZdededededeeef   d	eeef   fd
Z	deeef   d	efdZ
ee	 	 	 ddedeee      deee      deee      dee   d	efd              Zd Zdej                  fdZed        Z xZS )EomtForUniversalSegmentationr(  r   c                    t         |   |       || _        |j                  | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        |       c}      | _        t%        |      | _        t)        |      | _        t        j,                  |j                  |j.                  dz         | _        |j2                  |j4                  z  |j2                  |j4                  z  f| _        |j8                  |j:                  |j<                  d| _        tA        || j>                        | _!        | jE                  dtG        jH                  |jJ                               | jM                          y c c}w )Nr  r   )r   r   r   )r   r   attn_mask_probs)'ra   rb   r   num_hidden_layersr-  r+  r   r  r   r  	layernormr;  r   rB  r  ro   r  layersr  upscale_blockr  	mask_headrX  r   class_predictorr  r  	grid_sizer   r   r   r   r   rT   r   r(   r   r  	post_initr  s      r/   rb   z%EomtForUniversalSegmentation.__init__  sp    !'!9!9(0f&8&8f>S>ST\\&"4"4f6H6HI
mmfF^F^@_$`1Yv%6$`a+F3%f-!yy););V=N=NQR=RS ++v/@/@@&BSBSW]WhWhBhi"("5"5++++.
 "T=M=MN.

6;L;L0MN% %as   >G*r   r   rf   rg   r  r2   c                     | j                  |||||      }| j                  j                         D ]'  \  }}|j                         D ]  \  }	}
||	v s|
|z  }
 ) |S )Nr   r   rf   rg   r  )rT   r   r  )rd   r   r   rf   rg   r  r	  r
  r   loss_keyr   s              r/   get_loss_dictz*EomtForUniversalSegmentation.get_loss_dict(  s|     (,~~!5!5#%"7 (6 (
	  ++113 	#KC"+//"3 #$(?FND#	#
 r.   r	  c                 4    t        |j                               S r   )rG   rf  )rd   r	  s     r/   get_lossz%EomtForUniversalSegmentation.get_loss@  s    9##%&&r.   r#   r<   c                 (   d\  }}d}|t        d      | j                  |      }	t        | j                        D ]  \  }
}|
| j                  | j
                  j                  z
  k(  rp| j                  j                  dddddf   j                  |	j                  d   dd      j                  |	j                        }t        j                  ||	fd      }	|
| j                  | j
                  j                  z
  k\  r| j                  s7| j                   |
| j                  z
  | j
                  j                  z      dkD  r| j#                  |	      }| j%                  |      \  }}||fz  }||fz  }t        j&                  |	j                  d   |	j                  d   |	j                  d   |	j                  t        j(                        }t+        j,                  || j.                  d	
      }|j1                  |j3                  d      |j3                  d      d      }| j
                  j4                  }|| j                  j6                  z   }|dkD  |ddd||df<   | j9                  || j                   |
| j                  z
  | j
                  j                  z      |||j                        }|ddddf   j                  d| j
                  j:                  dd      }|j=                         j?                  | d      } ||	|      }	 | j#                  |	      }| j%                  |      \  }}||fz  }||fz  }d}|B|@d}tA        ||      D ]/  \  }}| jC                  ||||d      }|| jE                  |      z  }1 tG        |||||      S )ah  
        mask_labels (`list[torch.Tensor]`, *optional*):
            list of mask labels of shape `(num_labels, height, width)` to be fed to a model
        class_labels (`list[torch.LongTensor]`, *optional*):
            list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
            labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
        patch_offsets (`list[torch.Tensor]`, *optional*):
            list of tuples indicating the image index and start and end positions of patches for semantic segementation.
        )r-   r-   Nz You have to specify pixel_valuesr   rB   r   r   )rj   rn   bilinear)sizemode)probnum_query_tokensencoder_start_tokensrj   .g    er`  r  )r   r   r   r    r#   )$rc   r+  r   r  r  r   r  rB  r   r=  rO   rq   rj   r(   r   rH  r  r  predictr   r   r  interpolater  r   r  r   r:  _disable_attention_maskrR  r   masked_fillr   r  r
  r   )rd   r(  rf   rg   r#   r<   masks_queries_logits_per_layerclass_queries_logits_per_layerrC  r!   r   layer_modulerB  norm_hidden_statesr   r   interpolated_logitsr  r  sequence_outputr   r	  s                         r/   r   z$EomtForUniversalSegmentation.forwardC  s   ( JPF&(F?@@5!*4;;!7 .	HCd,,t{{/E/EEE

))$1*5<<]=P=PQR=SUWY[\__`m`t`tu %		5-*@a Hd,,t{{/E/EEE!5!5cD<R<R6RUYU`U`UkUk6k!lop!p%)^^M%B"=A\\J\=]:$&:.3G2II..3G2II.!&!''*!''*!''*(//**" '(mm4Ht~~dn&o#&9&>&>',,Q/1D1I1I!1Lb'# $(;;#:#: '7$//:[:['[$ ObdeNeq"3#3"35I5JJK "&!=!="--cD4J4J.JT[[McMc.cd%5)=)00 "> " "04!=!D!DRIhIhjlnp!q!/!5!5!7!C!C^OUY!Z(GM].	H` ..759\\/5R22&+?*AA&&+?*AA&"|'?D>A.0N? 
1:$&: !..)=)= +!-*. / 	 i00
1 2!5!5-'
 	
r.   c                 .    | j                   j                  S r   )r+  r6  r  s    r/   get_input_embeddingsz1EomtForUniversalSegmentation.get_input_embeddings  s    ///r.   r   c                    |d d d | j                   j                  d d f   }| j                  |      }|d d | j                   j                  | j                  j                  z   d d d f   }|j                  dd      } |j                  |j                  d   dg| j                   }| j                  |      }| j                  |      }t        j                  d||      }||fS )Nr   r4   r   rB   zbqc, bchw -> bqhw)r   r   r  r+  r:  r   rb  rO   r  r  r   r(   einsum)rd   r   query_tokensclass_logitsprefix_tokensmask_logitss         r/   r  z$EomtForUniversalSegmentation.predict  s    a!:4;;#:#:!:A=>++L9q$++"9"9DOO<]<]"]"_abbc%//15---m.A.A!.DbZ4>>Z~~l3**=9ll#6mTL((r.   c                     |dk  r9t        j                  | j                  d   ||      |kD  }d| d d d ||d f   |<   | S )Nr   r   ri   )r(   rr   rO   )	attn_maskr  r  r  rj   random_queriess         r/   r  z4EomtForUniversalSegmentation._disable_attention_mask  sW    !8"ZZ	(:<LU[\_ccN VWIa***,@,AAB>Rr.   )NNN)r$   r%   r&   r  r   rb   r   r  r  r  r
  r   r   r   r,   r   r   r   r   r  r(   r  staticmethodr  r   r   s   @r/   r  r    s7    %Oz 8$ % 	
   $CK0 
c6k	0'$sF{"3 ' '  /3/304e
e
 d6l+e
 tF|,	e

  V-e
 +,e
 
,e
  e
N0)ell )   r.   r  )F)r`  )r`  F)Icollections.abcr"  r  dataclassesr   typingr   r   numpyr  r(   torch.nn.functionalr   r8   r  r   activationsr	   
file_utilsr
   r   r   modeling_layersr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   configuration_eomtr   scipy.optimizer   
accelerater   accelerate.utilsr   r   r>   rJ   rY   r  r[   r   r   r   r   r  r-  r   rM  rO  ri  r   r{  r}  r  r  r  r  r  r  r  r  r  r  __all__r-   r.   r/   <module>r9     s  ,   ! %      ! L L 9 F & P P / * 4'' 	7 7	 7B LQLL5:\\
\\@  6 , u|| X]XdXd 8g299 gTf f   <u|| U\\ VY ^c^j^j (uryy up	")) B"RYY "X %II%<<% 
% <<	%
 U\\*% % %.;)BII ;)|+RYY +U\\ e T V[VbVb (%299 %bii &(BII ("'* 'Tbll RYY 2	RYY 	299 " (0/ (0 (0V 
#6 
D !"@
Ar.   