
    im@                        d dl Z d dlmZmZ d dlmZ d dlZddlmZm	Z	 ddl
mZ ddlmZ  e       rd dlZdZ ej                   d	      Zej$                  rJ ej&                         Zej+                   ej,                  d
             ej/                  e       de_        deej2                  eeef   fdZ G d de      Ze G d d             Ze G d d             Z G d d      Zy)    N)	dataclassfield)IntEnum   )is_psutil_availableis_torch_xpu_available)logging)tracedContinuousBatchingLoggerz4%(asctime)s - %(name)s - %(levelname)s - %(message)sFreturnc                     t         j                  j                         rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }nt               rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }n/t         j                  j                  j                         rt         j                  j                  j                         rXt        j                  d      } t         j                  j                         }| t!        t         j                  d             z
  }d}nt        j                  d      } t#               rMt%        j&                         j(                  }t%        j*                         j-                         j.                  }|}nt0        j3                  d       d}d}d}| |||fS )Ncudaxpumpsrecommended_max_memoryr   cpuzCannot get memory breakdown on CPU without psutil: returning 0 for all memory values. Please install psutil to get an actual memory breakdown.)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedr   r   backendsr   is_builtdriver_allocated_memorygetattrr   psutilvirtual_memorytotalProcessmemory_inforssloggererror)r   r   reserved_memoryallocated_memorys       /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr,   *   s   zz f%

 

 zz77?LL**44V< ::66v>		!e$				yy66v>KK))33F; 9955f=				(	(	*u~~/A/A/J/J/Le$yy88:'*V'%))=U*V*XXe$ !00288L%~~/;;=AA.OLL< LO <2BBB    c                   $    e Zd ZdZdZdZdZdZdZy)RequestStatusz5Status of a generation request through its lifecycle.r         r      N)	__name__
__module____qualname____doc__PENDING
PREFILLINGDECODINGFINISHEDFAILED r-   r+   r/   r/   Q   s    ?GJHHFr-   r/   c                   &   e Zd ZU dZeed<    ee      Zee	   ed<    ee      Z
ee	   ed<    ee      Zee   ed<   dZedz  ed<   ej                  Zeed	<    eej$                        Zeed
<   dZeeef   ed<   dZee   dz  ed<   defdZy)GenerationOutputa  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
        lifespan (tuple[float, float]): The time the request was no longer pending and the time the request finished.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNr(   statuscreated_timer   r   lifespan
timestampsr   c                 <    | j                   t        j                  k(  S N)rE   r/   r:   selfs    r+   is_finishedzGenerationOutput.is_finishedt   s    {{m4444r-   )r3   r4   r5   r6   str__annotations__r   listrB   intrC   rD   floatr(   r/   r7   rE   timeperf_counterrF   rH   tuplerI   boolrN   r<   r-   r+   r>   r>   [   s     O!$7JS	7"'"=d3i=!$7Hd5k7E3:)11FM10A0ABL%B$,HeE5L!,%)JUd")5T 5r-   r>   c                      e Zd ZU dZeed<   ee   ed<   dZe	ed<   dZ
eed<    ee      Zee   ed	<    ee      Zee   ed
<    ee      Zee   ed<    ee      Zee   ed<   dZeed<   dZeed<   ej(                  Zeed<   dZedz  ed<   dZeee   z  dz  ed<    ee      Zee   ed<   dZe	ed<    eej8                        Zeed<   dZedz  ed<   dZe eef   ed<    ee      Z!ee   ed<   dZ"eed<   dZ#eed<   d Z$e%defd        Z&e&jN                  d!efd"       Z&e%dee   dz  fd#       Z(d$ Z)defd%Z*defd&Z+e,d'ed(edz  de	fd)       Z-d* Z.d+ Z/d,edd fd-Z0d/d.Z1y)0RequestStatea  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        initial_tokens (list[int]): The initial prompt tokens.
        num_children (int): The number of children requests
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The initial tokens IDs remaining to be processed.
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (int): The number of blocks allocated to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int | None): The maximum number of new tokens to generate.
        eos_token_id (None | int | list[int]): The ID(s) of the end-of-sequence tokens. Only used in post-init.
        _eos_token_ids (set[int]): The IDs of the end-of-sequence tokens, formatted as a set.
        streaming (bool): Whether to stream tokens as they're generated
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
    r?   initial_tokensFrecord_timestampsr   num_childrenr@   tokens_to_processremaining_prefill_tokensrC   rD   allocated_blocksposition_offset_status   Nmax_new_tokenseos_token_id_eos_token_ids	streamingrF   r(   rG   rH   _timestamps_true_initial_tokens_new_tokens_limitc                    | j                   dn| j                   | _        | j                  d d  | _        | j                  y t        | j                  t              r6| j                  dk\  r&| j                  j                  | j                         y y | j                  D ]#  }|dk\  s	| j                  j                  |       % y )Nri   r   )	rc   rj   rZ   r^   rd   
isinstancerR   re   add)rM   token_ids     r+   __post_init__zRequestState.__post_init__   s    /3/B/B/JPTPcPc(,(;(;A(>%$))3/  A%##''(9(9: & !-- 6q=''++H56r-   r   c                     | j                   S rK   )ra   rL   s    r+   rE   zRequestState.status   s    ||r-   valuec                 (   | j                   t        j                  k(  r#t        j                         df| _        || _         y |t        j                  k(  r8| j
                  d   t        j                         f| _        | j                          || _         y )Nr   r   )ra   r/   r7   rT   rU   rH   r:   log_end_of_request)rM   rq   s     r+   rE   zRequestState.status   ss    <<=000!..0"5DM  m,,,!]]1-t/@/@/BCDM##%r-   c                 6    | j                   r| j                  S d S rK   )r[   rg   rL   s    r+   rI   zRequestState.timestamps   s    #'#9#9tCtCr-   c                    t        | j                        }| j                         }| j                  d   | j                  z
  }| j                  d   | j                  z
  }t
        j                  d| j                   d|d|d|d|
       y )Nr   r0   Request z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrZ   generated_lenrH   rF   r'   infor?   )rM   prefill_len
decode_len
start_timeend_times        r+   rs   zRequestState.log_end_of_request   s    $--.'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r-   c                     | j                   S )zCGet the current length of the sequence (prompt + generated tokens).)r`   rL   s    r+   current_lenzRequestState.current_len   s    ###r-   c                 ,    t        | j                        S )z*Get the number of tokens generated so far.)rw   rC   rL   s    r+   rx   zRequestState.generated_len   s    4(())r-   rn   logprobc                 4   | j                   t        j                  k7  ry| j                  r-| j                  j                  t        j                                || j                  v }| j                         }|s|| j                  k  rF| j                  j                  |       |g| _        |dz  }|A| j                  j                  |       n%t        j                  d| j                    d|        |s|| j                  k\  rt        j"                  | _         yy)zUpdate the request with a newly generated token (and optional log probability of the token) and check for
        completion. Returns True if the request is now complete, False otherwise.Fr0   rv   z generated a useless token: T)rE   r/   r9   r[   rg   appendrT   rU   re   rx   rj   rC   r]   rD   r'   warningr?   r:   )rM   rn   r   is_eosr   s        r+   update_and_check_completionz(RequestState.update_and_check_completion   s    
 ;;-000 !!##D$5$5$78 T000((* kD$:$::!!((2&.ZD"1K"$$W-NNXdoo%66RS[R\]^[D$:$::'00DKr-   c           
      n   d| j                    d| j                   d| j                          dt        | j                         dt        | j
                         d| j                   dt        | j                         d| j                   d	| j                   g	}d
dj                  |      z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r?   ra   rx   rw   r]   r^   r`   rZ   r_   rC   join)rM   msgs     r+   __repr__zRequestState.__repr__  s    $//*+dll^$$,,./0C 6 6789D$A$A BCD--./!#d&9&9":!;< 5 567 5 567

 #W\\#%66>>r-   c                    | j                   rI| j                  | j                   d | j                  z   | _        | j                  d| j                    | _        t        | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  	      S )z7Convert the request state to a GenerationOutput object.N)	r?   rB   rC   rD   r(   rE   rF   rH   rI   )rh   rZ   rC   r>   r?   rD   r(   rE   rF   rH   rI   rL   s    r+   to_generation_outputz!RequestState.to_generation_output  s    $$$($7$78Q8Q8S$TW[WlWl$lD!"&"5"56Q8Q8Q"RD**!22]]**;;**]]

 
	
r-   new_request_idc                    t        j                         }t        di d|d| j                  d| j                  d| j
                  dd d| j                  dd d| j                  dd d| j                  d	| j                  d
| j                  d| j                  d| j                  d| j                  d|d|dfdg d| j                  d| j                  }| j                   dd |_        |S )ziFork the request into a new request with the same state except for request_id, created_time and lifespan.r?   rZ   r\   r]   NrC   rD   r_   r`   ra   rc   rd   rf   rF   rH   r   rg   r(   r[   r<   )rT   rU   rY   rZ   r\   r]   rC   rD   r_   r`   rE   rc   rd   rf   r(   r[   r^   )rM   r   tnew_requests       r+   forkzRequestState.fork   s4   " 
%
..
 **
 #44Q7	

 "2215
 ]]1%
 "22
 !00
 KK
  ..
 **
 nn
 
 W
 
  **!
" #44#
( 04/L/LQ/O,r-   c           
         | j                   dn!| j                   t        | j                        z
  }t        | j                  | j
                  | j                  z   | j                  dd | j                  | j                  || j                  | j                        }| j                  r| j                  |_        |S t        | j
                        |_        |S )aT  Creates an equivalent new request by removing the generated tokens and adding them to the initial prompt. The
        created request has THE SAME request_id. Notably, we can retrieve the original request from the created one with
        the _true_initial_tokens attribute. The logprobs of the generated tokens are kept in the new request.N)r?   rZ   rD   r\   r[   rc   rd   rf   )rc   rw   rC   rY   r?   rZ   rD   r\   r[   rd   rf   rh   )rM   rc   	new_states      r+   !create_equivalent_initial_requestz.RequestState.create_equivalent_initial_request:  s     "&!4!4!<4CVCVY\]a]r]rYsCs ..1F1FF]]1%**"44)**nn	
	 $$-1-F-FI*  .11D1D-EI*r-   )r   rY   )2r3   r4   r5   r6   rO   rP   rQ   rR   r[   rW   r\   r   r]   r^   rC   rD   rS   r_   r`   r/   r7   ra   rc   rd   setre   rf   rT   rU   rF   r(   rH   rV   rg   rh   rj   ro   propertyrE   setterrI   rs   r   rx   r
   r   r   r   r   r   r<   r-   r+   rY   rY   x   s   . OI#t#L##(#>tCy>*/+d3i  #("=d3i=!$7Hd5k7cOS*22G]2!#NC$J#+/L#S	/D(/$S9NCH9It0A0ABL%BE3:$,HeE5L!,$T:Ke: !#!'s'6$    ]]M   DDK$. D D
$S $*s *
 C %$, SW  >?
"3 > 4r-   rY   c                   ,    e Zd ZdZdZdedededdfdZy)	FutureRequestStatezPTracks the current state of a request and the relevant information to update it.statehas_new_tokencomplete_blocksr   r   r   r   Nc                 .    || _         || _        || _        y rK   r   )rM   r   r   r   s       r+   __init__zFutureRequestState.__init__X  s    
*.r-   )	r3   r4   r5   r6   	__slots__rY   rW   rR   r   r<   r-   r+   r   r   R  s/    Z >I/l /4 /RU /Z^ /r-   r   ) rT   dataclassesr   r   enumr   r   utilsr   r   utils.loggingr	   utils.metricsr
   r!   TMP_TOKEN_ID	getLoggerr'   	propagateStreamHandlerhandlersetFormatter	Formatter
addHandlerrV   r   rR   r,   r/   r>   rY   r   r<   r-   r+   <module>r      s     (   @ $ #   
		5	6	#g##%G***+abc
gF$Cu||S#s/J)K $CNG  5 5 58 V V Vr	/ 	/r-   