
    h                         d dl Z d dlmZmZ d dlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZmZmZ ddlmZmZ ddlmZ  G d	 d
e      Z G d ded      Z G d de      ZdgZy)    N)OptionalUnion   )BatchFeature)
ImageInputmake_nested_list_of_images)ImagesKwargsMultiModalDataProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)	to_py_objc                   ^    e Zd ZU ee   ed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<   y)Gemma3ImagesKwargsdo_pan_and_scanpan_and_scan_min_crop_sizepan_and_scan_max_num_crops"pan_and_scan_min_ratio_to_activatedo_convert_rgbN)__name__
__module____qualname__r   bool__annotations__intfloat     j/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/gemma3/processing_gemma3.pyr   r      s4    d^# (- (-(07TN"r    r   c                   4    e Zd ZU eed<   dddddddddd	Zy
)Gemma3ProcessorKwargsimages_kwargsFT)paddingreturn_mm_token_type_ids      g333333?)r   r   r   r   r   )text_kwargsr$   N)r   r   r   r   r   	_defaultsr   r    r!   r#   r#   $   s2    %% (,

 #$*-*+25
Ir    r#   F)totalc            
            e Zd ZddgZdZdZ	 	 ddef fdZ	 	 	 	 ddede	e
eee
   ee   f   d	ee   d
efdZddZed        Z xZS )Gemma3Processorimage_processor	tokenizerAutoImageProcessorAutoTokenizerimage_seq_lengthc                 $   || _         |j                  | _        |j                  | _        |j                  | _        dj	                  |j                  g|z        }d|j                   | |j
                   d| _        t        |    d|||d| y )N z

)r.   r/   chat_templater   )	r2   image_token_id	boi_tokenimage_tokenjoin	eoi_tokenfull_image_sequencesuper__init__)selfr.   r/   r5   r2   kwargsimage_tokens_expanded	__class__s          r!   r=   zGemma3Processor.__init__:   s     !1'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  	
+'	
 		
r    imagestextr?   returnc           
      `   ||t        d       | j                  t        fd| j                  j                  i|}t        |t              r|g}n.t        |t              st        |d   t              st        d      i }|W| j                  j                  |      }t        |      } | j                  |fi |d   }|s5|D cg c]*  }dj                  | j                  gt        |      z        , }}t        |      t        |      k7  r$t        dt        |       dt        |       d	      t        |j!                  d
            }	|D 
cg c]3  }t#        t        |            D 
cg c]  }
|	j!                  d       c}
5 }}}
t%        t'        |||            D ]  \  }\  }}}	t)        j*                  | j                  |      D cg c]  }|j-                          }}t        |      t        |      k7  r$t        dt        |       dt        |       d      t/        t        t'        |	|                  D ]a  \  }}|s	d| j                   ddj                  | j                  g|z        z   }|d | |z   ||t        | j                        z   d  z   }|||<   c  |D cg c](  }|j1                  | j                  | j2                        * }}|d   j!                  dd       }|d   j!                  dd      } | j                  dd|i|d   }| j5                  ||dg       |rRt7        j8                  |d         }t7        j:                  |      }d||| j<                  k(  <   |j?                         |d<   tA        i |||      S c c}w c c}
w c c}
}w c c}w c c}w )Nz+Provide at least one of `text` or `images`.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr$    z1Received inconsistently sized batches of images (z) and text (z).	num_cropszPrompt contained z image tokens but received z images.zHere is the original image z0 and here are some crops to help you see better r)   return_tensorsr&   FrC   image)
modalities	input_ids   token_type_ids)datatensor_typer   )!
ValueError_merge_kwargsr#   r/   init_kwargs
isinstancestrlist	TypeErrorr.   fetch_imagesr   r9   r7   lenr   poprange	enumerateziprefinditerstartreversedreplacer;   _check_special_mm_tokensnparray
zeros_liker6   tolistr   )r>   rB   rC   videosaudior?   output_kwargsimage_inputsbatched_imagesrH   _batch_num_crops	batch_idxpromptmimage_indexesnumidxformatted_image_textrI   r&   text_inputs	array_idsmm_token_type_idss                           r!   __call__zGemma3Processor.__call__P   s    <FNJKK***!
"&.."<"<
 
 dC 6DD$'
47C0H_``))66v>F7?N/4//Y-:XYL O]^V$..!1CK!?@^^>"c$i/ GNH[G\\hilmqirhssuv 
 ","2"2;"?@I\jkRX%F:LMQ	a 0MkOk:CCn^mDn:o 16	6FFI46KKPV4W Xq X Xv;#m"44$+C,>+??Z[^_e[fZggop 
 !)c)].K)L M 1HC9$..9IIyz!hh'7#'=>? - "(0D!DvcTWX\XfXfTgNgNiGj!j*0Y11& \``QWFNN4>>43K3KL`D`&}599:JDQ#0#?#C#CD^`e#f $dnnO$O-2NO%%dKWI%N $[!9:I "i 8BCi4+>+>>?,=,D,D,FK()!@K!@<!@n]]W _  Nk X$ as*   /NN ,NN N&,-N+N c                     i }|<| j                   gt        |      z  }dgt        |      z  }|j                  ||d       t        di |S )a  
        Computes the number of placeholder tokens needed for multimodal inputs with the given sizes.

        Args:
            image_sizes (`list[list[int]]`, *optional*):
                The input sizes formatted as (height, width) per each image.

        Returns:
            `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided
            input modalities, along with other useful data.
        rM   )num_image_tokensnum_image_patchesr   )r2   rY   updater
   )r>   image_sizesr?   vision_datar{   r|   s         r!   _get_num_multimodal_tokensz*Gemma3Processor._get_num_multimodal_tokens   s]     " $ 5 56[9II!"c+&6 64D[lmn,,,r    c                     | j                   j                  dgz   }| j                  j                  }|D cg c]
  }|dk7  s	| }}t        ||z         S c c}w )NrN   rH   )r/   model_input_namesr.   rV   )r>   tokenizer_input_namesimage_processor_input_namesnames       r!   r   z!Gemma3Processor.model_input_names   sb     $ @ @DTCU U&*&:&:&L&L#8S&kW[_jWjt&k#&k),GGHH 'ls   
A A)Nr'   )NNNN)N)r   r   r   
attributesimage_processor_classtokenizer_classr   r=   r   r   r   r   rV   r   r#   r   ry   r   propertyr   __classcell__)rA   s   @r!   r-   r-   5   s    #[1J0%O  #

 
0 "^bI^I^ I0$y/4HYCZZ[I^ ./I^ 
I^V-. I Ir    r-   )r^   typingr   r   numpyrd   feature_extraction_utilsr   image_utilsr   r   processing_utilsr	   r
   r   r   r   tokenization_utils_baser   r   utilsr   r   r#   r-   __all__r   r    r!   <module>r      sX     
 "  4 A f f C # #,E "CIn CIL 
r    