
    h"                     8   d dl Z d dlmZmZ d dlmZ d dlmZ d dlZddl	m
Z
 ddlmZ  e
j                  d      Zej                  e
j                          ed	eej&                  eeef   fd
       Z G d de      Ze G d d             Ze G d d             Zy)    N)	dataclassfield)Enum)Optional   )logging)tracedContinuousBatchingLoggerreturnc                  @   t         j                  j                         rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }nt         j                  j                  j                         rt         j                  j                  j                         rWt        j                  d      } t         j                  j                         }|t         j                  j                         z
  }d}nt        j                  d      } d }d}d}| |||fS )Ncudampsr   cpu)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedbackendsr   is_builtdriver_allocated_memoryrecommended_max_memory)r   r   reserved_memoryallocated_memorys       q/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/generation/continuous_batching/classes.pyget_device_and_memory_breakdownr       s   zz f%

 

 zz77?LL**44V< ::66v>				(	(	*u~~/A/A/J/J/Le$yy88:'%))*J*J*LLe$<2BBB    c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED r!   r   r#   r#   6   s*    ?GJ)7HHFr!   r#   c                   
   e Zd ZU dZeed<    ee      Zee	   ed<    ee      Z
ee	   ed<    ee      Zee   ed<   dZee   ed<   ej                   Zeed	<    eej$                        Zeed
<    ee	      Zee	   ed<   y)GenerationOutputay  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
        next_token (Optional[int]): The next token to be generated.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNerrorstatuscreated_time
next_token)r+   r,   r-   r.   str__annotations__r   listr<   intr=   r>   floatr?   r   r#   r/   r@   timerA   rB   r6   r!   r   r8   r8   B   s     O!$7JS	7"'"=d3i=!$7Hd5k7E8C=)11FM1		:L%: %c :J:r!   r8   c                      e Zd ZU dZeed<   dZeee	      ed<   dZ
eee	      ed<    ee      Zee	   ed<    ee      Zee	   ed<    ee      Zee	   ed	<   d
Ze	ed<   ej"                  Zeed<   dZe	ed<   dZe	ed<    eej*                        Zeed<   dZee   ed<   dZee   ed<   dZeeef   ed<   edefd       Zej<                  defd       Zd Zde	fdZ de	fdZ!e"de	de#fd       Z$d Z%d  Z&y)!RequestStatea  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The tokens IDs remaining to be processed (for split requests).
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (list[int]): The identifiers of the allocated blocks to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int): The maximum number of new tokens to generate.
        eos_token_id (int): The ID of the end-of-sequence token.
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
        next_token (Optional[str]): The next token to be generated.
    r9   Nfull_prompt_idsr<   r:   remaining_prompt_idsstatic_outputsallocated_blocksr   position_offset_status   max_new_tokenseos_token_idrA   r?   rB   )rS   rS   lifespanr   c                     | j                   S )N)rP   selfs    r   r@   zRequestState.status   s    ||r!   valuec                 (   | j                   t        j                  k(  r#t        j                         df| _        || _         y |t        j
                  k(  r8| j                  d   t        j                         f| _        | j                          || _         y )NrS   r   )rP   r#   r/   rH   rU   r4   log_end_of_request)rX   rY   s     r   r@   zRequestState.status   sm    <<=000!YY["-DM  m,,,!]]1-tyy{;DM##%r!   c                    t        | j                        }| j                         }| j                  d   | j                  z
  }| j                  d   | j                  z
  }t
        j                  d| j                   d|d|d|d|
       y )Nr      zRequest z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrK   generated_lenrU   rA   loggerinfor9   )rX   prefill_len
decode_len
start_timeend_times        r   r[   zRequestState.log_end_of_request   s    $../'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r!   c                     | j                   S )zCGet the current length of the sequence (prompt + generated tokens).)rO   rW   s    r   current_lenzRequestState.current_len   s    ###r!   c                 ,    t        | j                        S )z*Get the number of tokens generated so far.)r^   rM   rW   s    r   r_   zRequestState.generated_len   s    4&&''r!   token_idc                 .   | j                   t        j                  k7  ry|| j                  k(  xr | j                  dk7  }| j	                         | j
                  k\  }|r|r| j                  j                  |g       |s|rt        j                  | _         yy)zUpdate the request with a newly generated token and check for completion.

        Args:
            token_id: The token ID to add to the output sequence

        Returns:
            bool: True if the request is now complete, False otherwise
        FrS   T)	r@   r#   r3   rT   r_   rR   rM   extendr4   )rX   ri   is_eos
is_max_lens       r   update_with_tokenzRequestState.update_with_token   s     ;;-000T...J43D3D3J'')T-@-@@
 6&&z2Z'00DKr!   c           
      n   d| j                    d| j                   d| j                          dt        | j                         dt        | j
                         d| j                   dt        | j                         d| j                   d	| j                   g	}d
dj                  |      z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r9   rP   r_   r^   r<   rL   rO   rK   rN   rM   join)rX   msgs     r   __repr__zRequestState.__repr__   s    $//*+dll^$$,,./0C012D$=$= >?@--./!#d&:&:";!<= 5 567 3 345

 #W\\#%66>>r!   c           	          t        | j                  | j                  | j                  | j                  g | j
                  | j                        S )z7Convert the request state to a GenerationOutput object.)r9   r<   r@   r=   r>   r?   rB   )r8   r9   rK   r@   rM   r?   rB   rW   s    r   to_generation_outputz!RequestState.to_generation_output   s@    ++;;!00**
 	
r!   )'r+   r,   r-   r.   rC   rD   rK   r   rE   rF   r<   r   rL   rM   rN   rO   r#   r/   rP   rR   rT   rH   rA   rG   r?   rB   rU   tuplepropertyr@   setterr[   rg   r_   r	   boolrn   rr   rt   r6   r!   r   rJ   rJ   [   s`   ( O+/OXd3i(/&*Jc#*&+D&A$s)A %d ;NDI;"'"=d3i=OS*22G]2NCL#		:L%:E8C= $J$$,HeE5L!,   ]]M  
$S $(s (
 # $  4?

r!   rJ   )rH   dataclassesr   r   enumr   typingr   r   utils.loggingr   utils.metricsr	   	getLoggerr`   setLevelINFOstaticmethodru   r   rF   r    r#   r8   rJ   r6   r!   r   <module>r      s     (    $ # 
		5	6   Cu||S#s/J)K C C,	D 	 ; ; ;0 v
 v
 v
r!   