
    i<                     R   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
  e
       r4ddlmZmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z! erddl"m#Z#m$Z$m%Z%m&Z&  G d ded      Z'h dZ( ejR                  e*      Z+ G d de      Z,y)z
Handler for the /v1/chat/completions endpoint.

Supports streaming (SSE via DirectStreamer) and non-streaming (JSON) responses.
    N)AsyncGenerator)TYPE_CHECKING   )logging)is_serve_available)JSONResponseStreamingResponse)ChatCompletionChatCompletionMessageChatCompletionMessageToolCall)Choice)ChatCompletionChunkChoiceDeltaChoiceDeltaToolCall)CompletionCreateParamsStreaming)CompletionUsage   )BaseGenerateManagerBaseHandlerToolCallParser_StreamErrordetect_tool_format)GenerationConfigPreTrainedModelPreTrainedTokenizerFastProcessorMixinc                   "    e Zd ZU eed<   eed<   y)+TransformersCompletionCreateParamsStreaminggeneration_configseedN)__name__
__module____qualname__str__annotations__int     y/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/cli/serving/chat_completion.pyr   r   3   s    
Ir(   r   F)total>   nuseraudiostorelogprobsmetadata	functions
modalities
predictiontool_choiceservice_tiertop_logprobsfunction_callstream_optionsresponse_formatpresence_penaltyreasoning_effortweb_search_optionsparallel_tool_callsmax_completion_tokensc                   X    e Zd ZdZeZeZdede	de
ez  fdZ	 dde	ddd	d
de	dedddededz  de
fdZ	 dde	ddd	d
de	dedddededz  defdZddedddef fdZ	 	 dde	de	de	de	dedz  dee   dz  defdZ	 	 	 	 	 	 	 d de	de	dz  de	dz  de	dz  de	dz  dedz  dedz  de	fdZ xZS )!ChatCompletionHandlerz|Handler for the `/v1/chat/completions` endpoint.

    Supports both streaming (SSE) and non-streaming (JSON) responses.
    body
request_idreturnc           
      .  K   | j                  |       | j                  |      \  }}}| j                  j                  ||      }| j                  j                  ||      }t        j                  d| d|        | j                  j                  ||      }| j                  |d   |      }	|j                  |	d|j                  d      |rdnd	dd
      }
|s|
j                  |j                        }
| j                  ||j                  |      }|r|j!                  ||       |j                  d      rt#        |      nd}|j                  d      }|r| j%                  |||||
|||      S | j'                  |||||
|||       d{   S 7 w)a  Validate the request, load the model, and dispatch to streaming or non-streaming.

        Args:
            body (`dict`): The raw JSON request body (OpenAI chat completion format).
            request_id (`str`): Unique request identifier (from header or auto-generated).

        Returns:
            `StreamingResponse | JSONResponse`: SSE stream or JSON depending on ``body["stream"]``.
        )	processorz[Request received] Model: z, CB: use_cbmessagesTtoolsNpt)add_generation_promptrI   return_tensorsreturn_dicttokenizestream)gen_managertool_format)_validate_request_resolve_modelmodel_managerget_model_modalitygeneration_stateuse_continuous_batchingloggerwarningget_manager"get_processor_inputs_from_messagesapply_chat_templategettodevice_build_generation_configr   init_cbr   
_streaming_non_streaming)selfrA   rB   model_idmodelrE   modalityrG   rP   processor_inputsinputs
gen_configrQ   	streamings                 r)   handle_requestz$ChatCompletionHandler.handle_request_   s     	t$%)%8%8%>"%%%88)8T&&>>uhO3H:VF8LM++777PBB4
CSU]^.."&((7##)4t / 
 YYu||,F2249P9PY_2`
z2 4888G3D(/$HHX&	??'' # 	 	 ,,'' - 	 	 	 	s   FFFFNrf   r   rE   z(ProcessorMixin | PreTrainedTokenizerFastre   ri   rj   r   rP   rQ   c	           	          |j                  |||      \  |d   }	t        |	t              rt        |	      n|	j                  d   |rt        |      nddt        t        df   f fd}
t         |
       d      S )	z(Stream tokens as SSE via DirectStreamer.rB   	input_idsNrC   c            
       K   d} 	 j                  d       d}|s=j                          d {   }|g}	 	 |j                  j                                 
j                  d uxr j                  
j                  k\  }| rd	}n|rd}nd}t!        j                  j                  z         }	j                  ||	       y 7 # t        j
                  $ r Y nw xY wg }|D ]  }|d} nt        |t              r5|j                  d|j                   d       dj                  |        y d|i}Fj                  |      x}3|t        j                  u rzd} d	t        d
d d|d   |d   d      gi}|j                   j                   fdi|        |rdj                  |       |sn# t"        t        j$                  f$ r j'                           w xY ww)NF	assistant)rolerf   Tzdata: {"error": "z"}

 content
tool_callsr   function
_tool_callname	argumentsry   rz   )indextypeidrw   rf   lengthstopprompt_tokenscompletion_tokenstotal_tokens)finish_reasonrf   usage)_build_chunk_sser]   append
get_nowaitasyncio
QueueEmpty
isinstancer   msgjoinfeedr   CONSUMEDr   max_new_tokensr   r   GeneratorExitCancelledErrorcancel)has_tool_callsdonetextbatch	sse_partschunk_kwargsresulthit_maxr   r   rj   	input_lenre   parserqueuerB   rd   streamers             r)   sse_genz1ChatCompletionHandler._streaming.<locals>.sse_gen   sT    "NC++J[PX+YY!&,D!FE"!LL)9)9);< #J %334?vHDYDY]g]v]vDv!$0M$,M$*M'"+&.&;&;!*X-B-B!B
 ++"/"	 ,  i -
 #--  ,.I % l<#'D!%dL9%,,/A$((7-ST"$'')"44" )24'8!-V[[=N3N62[%)@)@@ (-1N ,$7./-7.8\+D:@.W]^iWj1k	%&/"	,L "(()>)>)>z)jQY)j]i)jk5l8 ! ggi00O v "7#9#9:  !	s^   G;.G CG !C A2G G;G C*'G )C**AG >G;?B
G +G88G;ztext/event-stream
media_type)	generate_streamingr   listlenshaper   r   r$   r	   )rd   rB   rf   rE   re   ri   rj   rP   rQ   ro   r   r   r   r   r   s   ``  ` `    @@@@r)   rb   z ChatCompletionHandler._streaming   s     &88	6S]jt8ux;'	&0D&AC	NyWYGZ	0;,E	~c4i8 E	 E	N !7JKKr(   c	           
        K   |j                  |||||       d{   \  }	}
}|j                  duxr t        |      |j                  k\  }t        |      }t        |
||
|z         }d}|?t	        j
                  |	|      }|'|D cg c]  }t        | dd|d   |d   d	       }}|d
}n|rd}nd}t        | j                  ||	||||      d      S 7 c c}w w)z)Run generation and return a JSONResponse.rn   Nr   rx   rw   ry   rz   r{   )r~   r}   rw   rv   r   r   )r   r   rv   zapplication/jsonr   )	generate_non_streamingr   r   r   r   parser   r   _build_completion)rd   rB   rf   rE   re   ri   rj   rP   rQ   ru   r   generated_idsr   r   r   rv   parsedtcr   s                      r)   rc   z$ChatCompletionHandler._non_streaming   sE     3>2T2T9fjZ 3U 3
 -
)M ++47kC<NR\RkRk<k.#/"%66
 
"#))';?F! %  2(\4'*,V*2k?!S
  !(M$M"M""+% #  *

 
	
C-
"s"   C!CA,C!	!C*1C!C!model_generation_configrG   c                    t         |   |||      }|j                  d      t        |d         |_        |j                  d      dt        |d         z   |_        |j                  d      6|d   j                         D ci c]  \  }}t        |      f| c}}|_        |j                  d      
|d   |_	        |S c c}}w )zApply Chat Completions params (``max_tokens``, ``frequency_penalty``, ``logit_bias``,
        ``stop``) on top of the base generation config.rF   
max_tokensfrequency_penaltyg      ?
logit_biasr   )
superr`   r]   r&   r   floatrepetition_penaltyitemssequence_biasstop_strings)rd   rA   r   rG   r   kv	__class__s          r)   r`   z.ChatCompletionHandler._build_generation_config:  s     "G<TCZci<j88L!-/243E/F,88'(436tDW?X9Y3Y088L!-HL\HZH`H`Hb.c1Ay!|.c+88F'-1&\*  	 /ds   Cru   r   r   rv   c                     t        |d|      }t        |t        t        j                               d|t	        d||      g|      }|j                  d      S )	aX  Build a non-streaming ChatCompletion response dict.

        Args:
            request_id (`str`): Unique request identifier.
            content (`str`): The generated text.
            model_id (`str`): Model ID to include in the response.
            finish_reason (`str`): Why generation stopped (``"stop"``, ``"length"``, ``"tool_calls"``).
            usage (`CompletionUsage`, *optional*): Token usage statistics.
            tool_calls (`list[dict]`, *optional*): Parsed tool calls, if any.

        Returns:
            `dict`: Serialized ``ChatCompletion`` ready for JSON response.
        rr   ru   rs   rv   zchat.completionr   )r|   messager   )r~   createdobjectrf   choicesr   T)exclude_none)r   r
   r&   timer   
model_dump)	rd   rB   ru   re   r   r   rv   r   r   s	            r)   r   z'ChatCompletionHandler._build_completionL  se    , (kV`a		$$#"/ 
   d 33r(   rs   c                     t        |t        t        j                               |t        t	        |||      d|      g|dd      }| j                  |      S )a  Build a streaming ``ChatCompletionChunk`` and format it as an SSE ``data:`` line.

        Args:
            request_id (`str`): Unique request identifier.
            content (`str`, *optional*): Text content delta.
            model (`str`, *optional*): Model ID.
            role (`str`, *optional*): Role (only sent in the first chunk).
            finish_reason (`str`, *optional*): Set on the final chunk.
            tool_calls (`list`, *optional*): Tool call deltas.
            usage (`CompletionUsage`, *optional*): Token usage (sent with the final chunk).

        Returns:
            `str`: A formatted SSE event string.
        r   r   )deltar|   r   rt   zchat.completion.chunk)r~   r   rf   r   r   system_fingerprintr   )r   r&   r   ChoiceChunkr   chunk_to_sse)	rd   rB   ru   rf   rs   r   rv   r   chunks	            r)   r   z&ChatCompletionHandler._build_chunk_sses  s_    0 $		$%gDZX"/ !*
   ''r(   )N)F)NN)rt   NNNNNN)r!   r"   r#   __doc__r   _valid_params_classUNUSED_CHAT_COMPLETION_FIELDS_unused_fieldsdictr$   r	   r   rl   r   rb   rc   boolr`   r   r   r   r   __classcell__)r   s   @r)   r@   r@   V   s   
 F2N? ?3 ?CTWcCc ?X $(YLYL !YL >	YL
 YL YL 'YL )YL D[YL 
YLL $(7
7
 !7
 >	7

 7
 7
 '7
 )7
 D[7
 
7
v!T !L^ !hl !0 )-(,%4%4 %4 	%4
 %4 %%4 J%%4 
%4R " $("&(,'('( t'( Tz	'(
 Dj'( Tz'( 4K'( %'( 
'(r(   r@   )-r   r   r   collections.abcr   typingr   utilsr   utils.import_utilsr   fastapi.responsesr   r	   openai.types.chatr
   r   r   !openai.types.chat.chat_completionr   'openai.types.chat.chat_completion_chunkr   r   r   r   *openai.types.chat.completion_create_paramsr   openai.types.completion_usager   r   r   r   r   r   transformersr   r   r   r   r   r   
get_loggerr!   rX   r@   r'   r(   r)   <module>r      s      *    4 Aff8mmMZ=  gg2QY^ ! 0 
		H	%D(K D(r(   