
    hY                         d Z ddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ  ej(                  e      Z e       rddlZ ed       G d de             ZdgZy)z
Processor class for SAM2.
    )deepcopy)OptionalUnionN   )
ImageInput)ProcessorMixin)BatchEncoding)
TensorTypeis_torch_availablelogging)requires)torch)backendsc                       e Zd ZdZdgZdZd dee   def fdZ	 	 	 	 	 	 	 d!de	de	d	ee
eeeee            ej                  f      d
ee
eeee         ej                  f      dee
eeee         ej                  f      dee
eee      ej                  f      dee
eef      defdZ	 d"deddddfdZd#dZd$dZd%dZd Zd Z	 d$de
ej                  ej4                  ef   dedededee   defdZd&dZ	 	 	 	 	 d'dZ xZS )(Sam2Processora  
    Constructs a SAM2 processor which wraps a SAM2 image processor and an 2D points & Bounding boxes processor into a
    single processor.

    [`Sam2Processor`] offers all the functionalities of [`Sam2ImageProcessorFast`] and [`Sam2VideoProcessor`]. See the docstring of
    [`~Sam2ImageProcessorFast.__call__`] and [`~Sam2VideoProcessor.__call__`] for more information.

    Args:
        image_processor (`Sam2ImageProcessorFast`):
            An instance of [`Sam2ImageProcessorFast`].
        target_size (`int`, *optional*):
            The target size (target_size, target_size) to which the image will be resized.
        point_pad_value (`int`, *optional*, defaults to -10):
            The value used for padding input points.
    image_processorSam2ImageProcessorFasttarget_sizepoint_pad_valuec                     t        |   |fi | || _        ||| _        y | j                  j                  d   | _        y )Nheight)super__init__r   r   sizer   )selfr   r   r   kwargs	__class__s        f/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/sam2/processing_sam2.pyr   zSam2Processor.__init__:   sC    3F3.*5*A;tG[G[G`G`aiGj    imagessegmentation_mapsinput_pointsinput_labelsinput_boxesoriginal_sizesreturn_tensorsreturnc                    | | j                   |f||d|}	nU|Ht        |t        j                        r|j	                         j                         }t        d|i|      }	nt        d      |	d   }|0t        |      dk7  r"t        |      t        |      k7  rt        d      |||| j                  |dd	d
d      }
| j                  |ddd      }| j                  |dddd      }|
| j                  |
      dd }|| j                  |      dd }|| j                  |      dd |
|k7  rt        d      |-t        |      dk\  rt        fd|D              rt        d      |
b| j                  |
dgz         }t        j                  |t        j                        }| j                  ||d       |	j!                  d|i       |J| j                  |      }t        j                  |t        j"                        }|	j!                  d|i       |Lt        j                  |t        j                        }| j                  ||d       |	j!                  d|i       |	S )a  
        This method uses [`Sam2ImageProcessorFast.__call__`] method to prepare image(s) for the model. It also prepares 2D
        points and bounding boxes for the model if they are provided.

        Args:
            images (`ImageInput`, *optional*):
                The image(s) to process.
            segmentation_maps (`ImageInput`, *optional*):
                The segmentation maps to process.
            input_points (`list[list[list[list[float]]]]`, `torch.Tensor`, *optional*):
                The points to add to the frame.
            input_labels (`list[list[list[int]]]`, `torch.Tensor`, *optional*):
                The labels for the points.
            input_boxes (`list[list[list[float]]]`, `torch.Tensor`, *optional*):
                The bounding boxes to add to the frame.
            original_sizes (`list[list[float]]`, `torch.Tensor`, *optional*):
                The original sizes of the images.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return.
            **kwargs:
                Additional keyword arguments to pass to the image processor.

        Returns:
            A [`BatchEncoding`] with the following fields:
            - `pixel_values` (`torch.Tensor`): The processed image(s).
            - `original_sizes` (`list[list[float]]`): The original sizes of the images.
            - `reshaped_input_sizes` (`torch.Tensor`): The reshaped input sizes of the images.
            - `labels` (`torch.Tensor`): The processed segmentation maps (if provided).
            - `input_points` (`torch.Tensor`): The processed points.
            - `input_labels` (`torch.Tensor`): The processed labels.
            - `input_boxes` (`torch.Tensor`): The processed bounding boxes.
        N)r!   r&   r%   )tensor_typez0Either images or original_sizes must be provided   z{original_sizes must be of length 1 or len(images). If you are passing a single image, you must pass a single original_size.   pointsz;[image level, object level, point level, point coordinates]   )expected_depth
input_nameexpected_formatexpected_coord_sizer   labelsz([image level, object level, point level])r.   r/   r0   boxesz)[image level, box level, box coordinates]zbInput points and labels have inconsistent dimensions. Please ensure they have the same dimensions.c              3   @   K   | ]  }t        |      d    k    yw)r*   N)len).0	img_boxesboxes_max_dimss     r   	<genexpr>z)Sam2Processor.__call__.<locals>.<genexpr>   s     [is9~q(99[s   zInput boxes have inconsistent dimensions that would require padding, but boxes cannot be padded due to model limitations. Please ensure all images have the same number of boxes.)dtypeT)preserve_paddingr"   r#   is_bounding_boxr$   )r   
isinstancer   Tensorcputolistr	   
ValueErrorr5   _validate_single_input_get_nested_dimensionsany_pad_nested_listtensorfloat32_normalize_tensor_coordinatesupdateint64)r   r    r!   r"   r#   r$   r%   r&   r   encoding_image_processorprocessed_pointsprocessed_labelsprocessed_boxespoints_max_dimslabels_max_dimspadded_pointsfinal_pointspadded_labelsfinal_labelsfinal_boxesr8   s                       @r   __call__zSam2Processor.__call__?   s   V ';t';';("3-( 	($ '.%,,7!/!3!3!5!<!<!>'46F5Wes't$OPP 22BC#n"5":s>?RVYZ`Va?a N 
 #|'?;CZ#:: # ]$%  ;    $:: # J	  ;   #99 " K$% : O  +"&"="=>N"OPRQR"S+"&"="=>N"OPRQR"S*!%!<!<_!Mbq!Q  +0@0L"o5$| 
 *s?/Cq/H[?[[$R   + $ 5 56F[\Z]H] ^$||MO22<bf2g(//0NO+ $ 5 56F X$||MM(//0NO*#ll?%--P22;`d2e(//0LM''r   coordsztorch.Tensorc                     |\  }}||}}t        |      j                         }|r|j                  ddd      }|d   ||z  z  |d<   |d   ||z  z  |d<   |r|j                  dd      }|S )a  
        Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.

        Args:
            target_size (`int`):
                The target size of the image.
            coords (`torch.Tensor`):
                The coordinates to be normalized.
            original_size (`tuple`):
                The original size of the image.
            is_bounding_box (`bool`, *optional*, defaults to `False`):
                Whether the coordinates are bounding boxes.
        r-   ).r   ).r*   r+   )r   floatreshape)	r   r   rX   original_sizer=   old_hold_wnew_hnew_ws	            r   _normalize_coordinatesz$Sam2Processor._normalize_coordinates   s      %u"Ku&!'')^^B1-F55=9v55=9v^^B*Fr   c           	         |yt        |t        j                        rb||dz
  k(  st        |j                        dk  r|j                         j                         S |D cg c]  }| j                  |||dz          c}S t        |t        j                        rT||dz
  k(  st        |j                        dk  r|j                         S |D cg c]  }| j                  |||dz          c}S t        |t              r+||k(  r|S |D cg c]  }| j                  |||dz          c}S t        |t        t        f      r|S t        dt        |             c c}w c c}w c c}w )aS  
        Recursively convert various input formats (tensors, numpy arrays, lists) to nested lists.

        Args:
            data: Input data in any format
            expected_depth: Expected nesting depth
            current_depth: Current depth in recursion

        Returns:
            Nested list representation of the data
        Nr-   r*   zUnsupported data type: )r>   r   r?   r5   shapenumpyrA   _convert_to_nested_listnpndarraylistintr[   rB   type)r   datar.   current_depthitems        r   rf   z%Sam2Processor._convert_to_nested_list   sO    < dELL) 22c$**o6Jzz|**,,jnobf44T>=[\K\]oobjj) 22c$**o6J{{}$jnobf44T>=[\K\]ood#. koobf44T>=[\K\]oosEl+K6tDzlCDD! p
 p ps    EE	E"c                    |g }t        |t              s|S t        |      dk(  r|j                  t        |             nt	        |d   t        |            |d<   t        |      dkD  rw|D ]r  }t        |t              s| j                  |      }t        |      D ]@  \  }}|dz   t        |      k\  r|j                  |       )t	        ||dz      |      ||dz   <   B t |S )a`  
        Get the maximum dimensions at each level of nesting.

        Args:
            nested_list (`list`):
                Nested list structure.
            max_dims (`list`, *optional*):
                Current maximum dimensions (for recursion).

        Returns:
            `list`: A list of maximum dimensions for each nesting level.
        r   r*   )r>   ri   r5   appendmaxrD   	enumerate)r   nested_listmax_dimsrn   sub_dimsidims          r   rD   z$Sam2Processor._get_nested_dimensions  s     H+t,Ox=AOOC,-hqk3{+;<HQK{a# HdD)#::4@H"+H"5 H3q5CM1$OOC0.1(1q5/3.GHQUO	H	H r   c                     || j                   }|t        |      k\  r|S t        |t              s|g}t        |      }||   }|t        |      dz
  k(  r|j	                  |g||z
  z         n|dkD  rm|t        |      dz
  k  r||dz   d }| j                  ||      }n|g||dz      z  }|j	                  t        ||z
        D 	cg c]  }	t        |       c}	       nK||dz   d }| j                  ||      }|j	                  t        |      D 	cg c]  }	t        |       c}	       |t        |      dz
  k  rJt        t        |            D ]3  }
t        ||
   t              s| j                  ||
   ||dz   |      ||
<   5 |S c c}	w c c}	w )a  
        Recursively pad a nested list to match target dimensions.

        Args:
            nested_list (`list`):
                Nested list to pad.
            target_dims (`list`):
                Target dimensions for each level.
            current_level (`int`, *optional*, defaults to 0):
                Current nesting level.
            pad_value (`int`, *optional*):
                Value to use for padding.

        Returns:
            `list`: The padded nested list.
        Nr*   r   r-   )	r   r5   r>   ri   extend_create_empty_nested_structureranger   rF   )r   rs   target_dimscurrent_level	pad_valuecurrent_sizer   template_dimstemplate_rv   s              r   rF   zSam2Processor._pad_nested_list,  s   " ,,IC,, +t,&-K ;'!-0 C,q00	{kL.HIJ a 3{#3a#77$/0A0C$DM#BB=R[\H !*{[9J-KKH""kT`F`@a#b1HX$6#bc !,MA,=,? @>>}iX""k@R#S1HX$6#ST 3{+a//3{+, vk!nd3%)%:%:;q>;XehiXikt%uKNv  $c
 $Ts   FFc                     t        |      dk(  r	|g|d   z  S t        |d         D cg c]  }| j                  |dd |       c}S c c}w )a  
        Create an empty nested structure with given dimensions filled with pad_value.

        Args:
            dims (`list`):
                The dimensions of the nested structure.
            pad_value (`int`):
                The value to fill the structure with.
        r*   r   N)r5   r{   rz   )r   dimsr~   r   s       r   rz   z,Sam2Processor._create_empty_nested_structurej  sT     t9>;a((V[\`ab\cVdeQRD77QR)Leees   Ac                     t        |t              r&t        |      dk(  ryd| j                  |d         z   S t        |t        j
                  t        j                  f      rt        |j                        S y)z
        Get the nesting level of a list structure.

        Args:
            input_list (`list`):
                The list to get the nesting level of.
        r   r*   )	r>   ri   r5   _get_nesting_levelrg   rh   r   r?   rd   )r   
input_lists     r   r   z Sam2Processor._get_nesting_levely  sb     j$':!#t..z!}===
RZZ$>?z''((r   rl   r.   r/   r0   r1   c                    |yt        |t        j                  t        j                  f      ry|j
                  |k7  r"t        d| d| d| d|j
                   d	      |4|j                  d   |k7  r"t        d| d| d|j                  d    d	      | j                  ||      S t        |t              r@| j                  |      }||k7  rt        d| d
| d| d| d	      | j                  ||      S y)a  
                Validate a single input by ensuring proper nesting and raising an error if the input is not valid.

                Args:
                    data (`torch.Tensor`, `np.ndarray`, or `list`):
                        Input data to process.
                    expected_depth (`int`):
                        Expected nesting depth.
                    input_name (`str`):
                        Name of the input for error messages.
                    expected_format (`str`):
                        The expected format of the input.
                    expected_coord_size (`int`, *optional*):
                        Expected coordinate size (2 for points, 4 for boxes, None for labels).
        .
        NzInput z must be a tensor/array with z, dimensions. The expected nesting format is z. Got z dimensions.rZ   z as the last dimension, got .z must be a nested list with z( levels. The expected nesting format is z levels.)r>   r   r?   rg   rh   ndimrB   rd   rf   ri   r   )r   rl   r.   r/   r0   r1   rm   s          r   rC   z$Sam2Processor._validate_single_input  s   0 < dU\\2::67yyN* ZL(EnEU  VB  CR  BS  SY  Z^  Zc  Zc  Yd  dp  q  %0::b>%88$ ,IJ]I^^z{  |F  |F  GI  |J  {K  KL  M  //nEE dD! 33D9M. ZL(D^DTT|  ~M  }N  NT  Ub  Tc  ck  l  //nEE "r   c                    |r"|| j                   k7  }|j                  dd      }t        t        |            D ]  }||j                  d   k  s|t        |      k  r||   n|d   }| j                  | j                  ||   ||      }	|r5|   }
t        j                  |
j                  ||         |	||         ||<   |	||<    y)a  
        Helper method to normalize coordinates in a tensor across multiple images.

        Args:
            tensor (`torch.Tensor`):
                Input tensor with coordinates.
            original_sizes (`list`):
                Original image sizes.
            is_bounding_box (`bool`, *optional*, defaults to `False`):
                Whether coordinates are bounding boxes.
            preserve_padding (`bool`, *optional*, defaults to `False`):
                Whether to preserve padding values (for points).
        rZ   T)rw   keepdimr   r<   N)
r   allr{   r5   rd   rb   r   r   where	expand_as)r   rG   r%   r=   r;   mask
coord_maskimg_idxr]   normalized_coordsimg_masks              r   rI   z+Sam2Processor._normalize_tensor_coordinates  s     T111Db$7JS01 	8Ga(;BSEX;Xw 7^lmn^o$($?$?$$fWo}Ve %@ %! $)'2H&+kk **6'?;=NPVW^P_'F7O '8F7O	8r   c           	      H     | j                   j                  |||||||fi |S )a-  
        Remove padding and upscale masks to the original image size.

        Args:
            masks (`Union[List[torch.Tensor], List[np.ndarray]]`):
                Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
            original_sizes (`Union[torch.Tensor, List[Tuple[int,int]]]`):
                The original sizes of each image before it was resized to the model's expected input shape, in (height,
                width) format.
            mask_threshold (`float`, *optional*, defaults to 0.0):
                Threshold for binarization and post-processing operations.
            binarize (`bool`, *optional*, defaults to `True`):
                Whether to binarize the masks.
            max_hole_area (`float`, *optional*, defaults to 0.0):
                The maximum area of a hole to fill.
            max_sprinkle_area (`float`, *optional*, defaults to 0.0):
                The maximum area of a sprinkle to fill.
            apply_non_overlapping_constraints (`bool`, *optional*, defaults to `False`):
                Whether to apply non-overlapping constraints to the masks.

        Returns:
            (`torch.Tensor`): Batched masks in batch_size, num_channels, height, width) format, where (height, width)
            is given by original_size.
        )r   post_process_masks)	r   masksr%   mask_thresholdbinarizemax_hole_areamax_sprinkle_area!apply_non_overlapping_constraintsr   s	            r   r   z Sam2Processor.post_process_masks  s?    F 7t##66-	
 	
 		
r   )Ni)NNNNNNN)F)r   )N)r   N)FF)        Tr   r   F)__name__
__module____qualname____doc__
attributesimage_processor_classr   rj   r   r   r   ri   r[   r   r?   strr
   r	   rW   rb   rf   rD   rF   rz   r   rg   rh   rC   rI   r   __classcell__)r   s   @r   r   r   %   s     $$J4kXc] k\_ k "(,UYMQNRKO;?B(B( &B( uT$tDK/@*A%BELL%PQR	B(
 uT$tCy/%:ELL%HIJB( eDd5k):$;U\\$IJKB( !tDK'8%,,'F!GHB( !sJ!78B( 
B(J X](6	<$EL#J<|f. .20FELL"**d230F 0F 	0F
 0F &c]0F 
0Fd!8N */,
r   r   )r   copyr   typingr   r   re   rg   image_utilsr   processing_utilsr   tokenization_utils_baser	   utilsr
   r   r   utils.import_utilsr   
get_loggerr   loggerr   r   __all__ r   r   <module>r      sr     "  % . 4 < < * 
		H	% 
:e
N e
 e
P 
r   