
    Li7                         d Z ddlZddlmZmZ ddlmZmZ ddlmZ ddl	m
Z
 ddl	mZ dd	l	mZ dd
l	mZ  ej                   d      Zg dZ G d d      Zdedej*                  j,                  j.                  defdZdedefdZ G d d      Zy)z)[Experimental] Text Only Local Tokenizer.    N)AnyIterable)OptionalUnion)sentencepiece_model_pb2   )_common)_local_tokenizer_loader)_transformers)typeszgoogle_genai.local_tokenizer)_parse_hex_byte_token_str_to_bytesLocalTokenizer_TextsAccumulatorc                      e Zd ZdZddZdee   fdZdeej                     ddfdZ
dej                  ddfd	Zd
ej                  ddfdZdej                  dej                  fdZdeej                     ddfdZdeej"                     ddfdZdej"                  ddfdZdej(                  dej(                  fdZdej,                  dej,                  fdZdeeef   deeef   fdZdedefdZy)r   a  Accumulates countable texts from `Content` and `Tool` objects.

  This class is responsible for traversing complex `Content` and `Tool`
  objects and extracting all the text content that should be included when
  calculating token counts.

  A key feature of this class is its ability to detect unsupported fields in
  `Content` objects. If a user provides a `Content` object with fields that
  this local tokenizer doesn't recognize (e.g., new fields added in a future
  API update), this class will log a warning.

  The detection mechanism for `Content` objects works by recursively building
  a "counted" version of the input object. This "counted" object only
  contains the data that was successfully processed and added to the text
  list for tokenization. After traversing the input, the original `Content`
  object is compared to the "counted" object. If they don't match, it
  signifies the presence of unsupported fields, and a warning is logged.
  returnNc                     g | _         y N_textsselfs    Z/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/google/genai/local_tokenizer.py__init__z_TextsAccumulator.__init__;   s	    DK    c                     | j                   S r   r   r   s    r   	get_textsz_TextsAccumulator.get_texts>   s    ;;r   contentsc                 4    |D ]  }| j                  |        y r   )add_content)r   r   contents      r   add_contentsz_TextsAccumulator.add_contentsA   s      
w r   r!   c                 h   t        j                  g |j                        }|j                  rB|j                  D ]2  }|j                  J t        j                         }|j
                  |j                  t        d      |j                  |j                  |_        |j                  ,| j                  |j                         |j                  |_	        |j                  ,| j                  |j                         |j                  |_        |j                  6|j                  |_        | j                  j                  |j                         |j                  j                  |       5 |j!                  d      |j!                  d      k7  rt"        j%                  d| d| d       y y )N)partsrolez6LocalTokenizers do not support non-text content types.T)exclude_nonezHContent contains unsupported types for token counting. Supported fields z. Got .)r   Contentr%   r$   Part	file_datainline_data
ValueErrorvideo_metadatafunction_calladd_function_callfunction_responseadd_function_responsetextr   append
model_dumploggerwarning)r   r!   counted_contentpartcounted_parts        r   r    z_TextsAccumulator.add_contentE   s   mm"7<<@O}}-- 3$$$000zz|>>%)9)9)EF  *(,(;(;,
%)

 
 !3!3
4'+'9'9,
$!!-

$
$T%;%;
<+/+A+A,
(99 "ii,

++

TYY
'$$\2%3( t,0J0J 1K 1  nn$%VG9A7r   r.   c                    |j                   r%| j                  j                  |j                          t        j                  |j                         }|j
                  r#| j                  |j
                        }||_        yy)zProcesses a function call and adds relevant text to the accumulator.

    Args:
        function_call: The function call to process.
    )nameN)r;   r   r3   r   FunctionCallargs_dict_traverse)r   r.   counted_function_callcounted_argss       r   r/   z#_TextsAccumulator.add_function_calld   si     
kk++,!..M4F4FG((););<l#/  r   toolc                     t        j                  g       }|j                  rP|j                  D ]A  }| j                  |      }|j                  g |_        |j                  j	                  |       C |S )N)function_declarations)r   ToolrC   _function_declaration_traverser3   )r   rA   counted_toolfunction_declarationcounted_function_declarations        r   add_toolz_TextsAccumulator.add_toolq   sx    ::B7L!!"&"<"< P
'+'J'J (
$ --5/1,
,**112NOP r   toolsc                 4    |D ]  }| j                  |        y r   )rI   )r   rJ   rA   s      r   	add_toolsz_TextsAccumulator.add_tools~   s     
mmDr   function_responsesc                 4    |D ]  }| j                  |        y r   )r1   )r   rM   r0   s      r   add_function_responsesz(_TextsAccumulator.add_function_responses   s#     0 4
  !234r   r0   c                    t        j                         }|j                  r6| j                  j	                  |j                         |j                  |_        |j
                  r#| j                  |j
                        }||_        y y r   )r   FunctionResponser;   r   r3   responser>   )r   r0   counted_function_responsecounted_responses       r   r1   z'_TextsAccumulator.add_function_response   ss     !& 6 6 8
kk*//0'8'='=$!!,,->-G-GH+;( "r   rG   c                    t        j                         }|j                  r6| j                  j	                  |j                         |j                  |_        |j
                  r6| j                  j	                  |j
                         |j
                  |_        |j                  r"| j                  |j                        }||_        |j                  r"| j                  |j                        }||_        |S r   )	r   FunctionDeclarationr;   r   r3   description
parameters
add_schemarR   )r   rG   rH   counted_parametersrT   s        r   rE   z0_TextsAccumulator._function_declaration_traverse   s     $)#<#<#>   
kk-223*>*C*C"'''
kk-99:

*
* #. &&??+?+J+JK0B"-$$)=)F)FG.>"+''r   schemac                    t        j                         }|j                  r|j                  |_        |j                  r|j                  |_        |j                  |j                  |_        |j
                  r6| j                  j                  |j
                         |j
                  |_        |j                  r6| j                  j                  |j                         |j                  |_        |j                  r6| j                  j                  |j                         |j                  |_	        |j                  r6| j                  j                  |j                         |j                  |_        |j                  r|j                  |_        |j                  r"| j                  |j                        }||_        |j                  r\i }|j                  j                         D ]6  \  }}| j                  j                  |       | j                  |      }|||<   8 ||_        |j                   r"| j#                  |j                         }||_        |S )zProcesses a schema and adds relevant text to the accumulator.

    Args:
        schema: The schema to process.

    Returns:
        The new schema object with only countable fields.
    )r   Schematypetitledefaultformatr   r3   rW   enumextendrequiredproperty_orderingitemsrY   
propertiesexample_any_traverse)	r   r[   counted_schemacounted_schema_itemsdkeyvaluecounted_valuecounted_schema_examples	            r   rY   z_TextsAccumulator.add_schema   s    \\^N{{"KKn||#\\n~~!%~~n}}
kk'$mmn
kk++,#)#5#5n {{
kk%"KKn
kk) &n)/)A)An&||!__V\\:1n
a))//1 *#u3.# #$n~~#11&..A5nr   rl   c                     i }| j                   j                  t        |j                                      |j	                         D ]  \  }}| j                  |      ||<    |S )zProcesses a dict and adds relevant text to the accumulator.

    Args:
        d: The dict to process.

    Returns:
        The new dict object with only countable fields.
    )r   rc   listkeysrf   ri   )r   rl   counted_dictrm   vals        r   r>   z _TextsAccumulator._dict_traverse   sY     LKKtAFFH~&GGI 2S,,S1l32r   rn   c                 
   t        |t              r| j                  j                  |       |S t        |t              r| j                  |      S t        |t              r|D cg c]  }| j                  |       c}S |S c c}w )zProcesses a value and adds relevant text to the accumulator.

    Args:
        value: The value to process.

    Returns:
        The new value with only countable fields.
    )
isinstancestrr   r3   dictr>   rr   ri   )r   rn   items      r   ri   z_TextsAccumulator._any_traverse   sp     %
kkl	E4	   ''	E4	 3894d  &99l :s   #B )r   N)__name__
__module____qualname____doc__r   r   rx   r   r   r(   r"   r    r<   r/   rD   rI   rL   rQ   rO   r1   rV   rE   r]   rY   ry   r   r>   ri    r   r   r   r   '   sM   & #  8EMM#:  t   4 >0U-?-? 0D 05:: %** Xejj1 d 4 ()?)? @44	<$55	<	<("'";";(  ((+u|| + +Zd38n c3h   r   r   tokenr^   r   c                     |t         j                  j                  j                  j                  k(  rt        |       j                  dd      S | j                  dd      j                  d      S )Nr   big)length	byteorderu   ▁ zutf-8)	r   
ModelProtoSentencePieceTypeBYTEr   to_bytesreplaceencode)r   r^   s     r   r   r      s\     
$//==BBGGG5!**!u*EE==$++G44r   c                    t        |       dk7  rt        d|        | j                  d      r| j                  d      st        d|        	 t	        | dd d      }|d
k\  rt        d|        |S # t        $ r t        d	|        w xY w)zParses a hex byte string of the form '<0xXX>' and returns the integer value.

  Raises ValueError if the input is malformed or the byte value is invalid.
     zInvalid byte length: z<0x>zInvalid byte format:          zInvalid hex value:    zByte value out of range: )lenr,   
startswithendswithint)r   ru   s     r   r   r      s     	Z1_
,UG4
55			%	 s(;
,UG4
554
eAaj"
C 	CZ
08
99	* 
 4
*5'2
334s   A2 2B
c                   @   e Zd ZdZdefdZ ej                  d      dddee	j                  e	j                  f   dee	j                     d	e	j                  fd
       Z ej                  d      dee	j                  e	j                  f   d	e	j                   fd       Zy)r   a  [Experimental] Text Only Local Tokenizer.

  This class provides a local tokenizer for text only token counting.

  LIMITATIONS:
  - Only supports text based tokenization and no multimodal tokenization.
  - Forward compatibility depends on the open-source tokenizer models for future
  Gemini versions.
  - For token counting of tools and response schemas, the `LocalTokenizer` only
  supports `types.Tool` and `types.Schema` objects. Python functions or Pydantic
  models cannot be passed directly.
  
model_namec                     t        j                  |      | _        t        j                  | j                        | _        t        j
                  | j                        | _        y r   )loaderget_tokenizer_name_tokenizer_nameload_model_proto_model_protoget_sentencepiece
_tokenizer)r   r   s     r   r   zLocalTokenizer.__init__$  sF    !44Z@D//0D0DED..t/C/CDDOr   zThe SDK's local tokenizer implementation is experimental and may change in the future. It only supports text based tokenization.N)configr   r   r   c                   t        j                  |      }t               }t        j                  j                  |xs i       }|j                  |       |j                  r|j                  |j                         |j                  r;|j                  j                  r%|j                  |j                  j                         |j                  r/|j                  t        j                  |j                  g             | j                  j                  t        |j!                                     }t        j"                  t%        d |D                    S )a  Counts the number of tokens in a given text.

    Args:
      contents: The contents to tokenize.
      config: The configuration for counting tokens.

    Returns:
      A `CountTokensResult` containing the total number of tokens.

    Usage:

    .. code-block:: python

      from google import genai
      tokenizer = genai.LocalTokenizer(model_name='gemini-2.0-flash-001')
      result = tokenizer.count_tokens("What is your name?")
      print(result)
      # total_tokens=5
    c              3   2   K   | ]  }t        |        y wr   )r   ).0tokenss     r   	<genexpr>z.LocalTokenizer.count_tokens.<locals>.<genexpr>R  s     ?V?s   )total_tokens)t
t_contentsr   r   CountTokensConfigmodel_validater"   rJ   rL   generation_configresponse_schemarY   system_instructionr   r   rr   r   CountTokensResultsum)r   r   r   processed_contentstext_accumulatortokens_lists         r   count_tokenszLocalTokenizer.count_tokens)  s    : h/(*$$33FLbAF!!"45||  .F$<$<$L$L!!&":":"J"JK  ##ALL&2K2K1L$MN//((.>.H.H.J)KLK""?;?? r   c                    t        j                  |      }t               }|D ]  }|j                  |        | j                  j                  |j                               }g }|D ];  }|j                  s|j                  D ]  }|j                  |j                          = g }t        ||      D ]  \  }	}
|j                  t        j                  |	j                  D cg c]  }|j                   c}|	j                  D cg c]C  }t        |j                   | j"                  j                  |j                     j$                        E c}|
              t        j&                  |      S c c}w c c}w )a,  Computes the tokens ids and string pieces in the input.

    Args:
      contents: The contents to tokenize.

    Returns:
      A `ComputeTokensResult` containing the token information.

    Usage:

    .. code-block:: python

      from google import genai
      tokenizer = genai.LocalTokenizer(model_name='gemini-2.0-flash-001')
      result = tokenizer.compute_tokens("What is your name?")
      print(result)
      # tokens_info=[TokensInfo(token_ids=[279, 329, 1313, 2508, 13], tokens=[b' What', b' is', b' your', b' name', b'?'], role='user')]
    )	token_idsr   r%   )tokens_info)r   r   r   r    r   EncodeAsImmutableProtor   r$   r3   r%   zipr   
TokensInfopiecesidr   piecer   r^   ComputeTokensResult)r   r   r   r   r!   tokens_protosroles_token_infostokens_protor%   r   s               r   compute_tokenszLocalTokenizer.compute_tokensU  s[   4 h/(*% ,""7+,OO::""$M E% %	 	%A
,,w||
$	%%
 K!-7 d


/;/B/BCeC
  ,22	  &kk4#4#4#;#;EHH#E#J#J 	 $$== Ds   E/AE4)r{   r|   r}   r~   rx   r   r	   experimental_warningr   r   ContentListUnionContentListUnionDictr   CountTokensConfigOrDictr   r   r   r   r   r   r   r   r     s    E E
  7B 9=	&e,,e.H.HHI& u445	&
 &	&P  7B2>e,,e.H.HHI2>   2>	2>r   r   )r~   loggingtypingr   r   r   r   sentencepiecer    r	   r
   r   r   r   r   	getLoggerr5   __all__r   rx   r   r   r   bytesr   r   r   r   r   r   r   <module>r      s     0    " 1  /   			9	:M M`55-88FFKK5
53 3 ,u> u>r   