
    iV                        d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
  e
       rDddlmZ dd	lmZmZ dd
lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z' ddlm(Z(m)Z)m*Z*m+Z+m,Z, erddl-m.Z.m/Z/m0Z0m1Z1  ejd                  e3      Z4 G d de#d      Z5h dZ6 G d de)      Z7de8de8de'fdZ9y)z~
Handler for the /v1/responses endpoint (OpenAI Responses API).

Supports streaming (SSE) and non-streaming (JSON) responses.
    N)AsyncGenerator)TYPE_CHECKING   )logging)is_serve_available)HTTPException)JSONResponseStreamingResponse)ResponseResponseCompletedEventResponseContentPartAddedEventResponseContentPartDoneEventResponseCreatedEventResponseErrorResponseErrorEventResponseFailedEvent&ResponseFunctionCallArgumentsDoneEventResponseFunctionToolCallResponseInProgressEventResponseOutputItemAddedEventResponseOutputItemDoneEventResponseOutputMessageResponseOutputTextResponseTextDeltaEventResponseTextDoneEvent)ResponseCreateParamsStreaming)InputTokensDetailsOutputTokensDetailsResponseUsage   )BaseGenerateManagerBaseHandlerToolCallParser_StreamErrordetect_tool_format)GenerationConfigPreTrainedModelPreTrainedTokenizerFastProcessorMixinc                   "    e Zd ZU eed<   eed<   y))TransformersResponseCreateParamsStreaminggeneration_configseedN)__name__
__module____qualname__str__annotations__int     r/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/cli/serving/response.pyr+   r+   F   s    
Ir5   r+   F)total>   textuserstorepromptinclude	reasoning
background
truncationtool_choiceservice_tiertop_logprobsmax_tool_callsprevious_response_idc                        e Zd ZdZeZeZdede	de
ez  fdZededee   fd       Z	 dde	dd	d
dde	dededddededz  de
fdZ	 dde	dd	d
dde	dededddededz  defdZddedddef fdZ xZS )ResponseHandlerz+Handler for the ``/v1/responses`` endpoint.body
request_idreturnc                 P  K   | j                  |       | j                  |      \  }}}| j                  j                  ||      }| j                  j                  ||      }t        j                  d| d|        | j                  j                  ||      }| j                  |      }	| j                  |	|      }
|j                  |
d|j                  d      |rdnddd	      }|s|j                  |j                        }| j                  ||j                   |      }|r|j#                  ||       |j                  d      rt%        |      nd}|j                  d
d      }|r| j'                  |||||||||	      S | j)                  |||||||||	       d{   S 7 w)az  Validate, load model, dispatch to streaming or non-streaming.

        Args:
            body (`dict`): The raw JSON request body (OpenAI Responses API format).
            request_id (`str`): Unique request identifier (from header or auto-generated).

        Returns:
            `StreamingResponse | JSONResponse`: SSE stream or JSON depending on ``body["stream"]``.
        )	processorz[Request received] Model: z, CB: use_cbTtoolsNpt)add_generation_promptrN   return_tensorsreturn_dicttokenizestream)gen_managertool_format)_validate_request_resolve_modelmodel_managerget_model_modalitygeneration_stateuse_continuous_batchingloggerwarningget_manager_input_to_messages"get_processor_inputs_from_messagesapply_chat_templategettodevice_build_generation_configr,   init_cbr%   
_streaming_non_streaming)selfrG   rH   model_idmodelrK   modalityrM   rU   messagesprocessor_inputsinputs
gen_configrV   	streamings                  r6   handle_requestzResponseHandler.handle_requestb   s     	t$%)%8%8%>"%%%88)8T&&>>uhO3H:VF8LM++777P
 **40BB8XV.."&((7##)4t / 
 YYu||,F2249P9PY_2`
z23788G3D(/$HHXt,	??'' # 
 
 ,,'' - 
 
 
 
s   FF&F$ F&c                    | d   }| j                  d      }t        |t              r |rd|dgng }|j                  d|d       |S t        |t              r0|r*|d   d   dk7  r
d|dg|}|S t	        |      }||d   d<   |S |}|S t        |t
              r|rd|dgng }|j                  |       |S t        d	d
      )a  Convert the Responses API ``input`` field to a list of chat messages.

        Handles string, list, and dict inputs. If ``instructions`` is provided, it is
        prepended as a system message (or replaces an existing one).

        Args:
            body (`dict`): The raw request body containing ``input`` and optionally ``instructions``.

        Returns:
            `list[dict]`: Standard OpenAI-format chat messages.
        inputinstructionssystem)rolecontentr9   r   rx   ry   i  z''input' must be a string, list, or dict)status_codedetail)rc   
isinstancer1   appendlistdictr   )rG   inprv   rn   s       r6   r`   z"ResponseHandler._input_to_messages   s     7mxx/c3HTlCDZ\HOOV<=   T"q6&>X-)1l KRcRH   $CyH-9HQK	*    T"HTlCDZ\HOOC    C8abbr5   Nrl   r'   rK   z(ProcessorMixin | PreTrainedTokenizerFastrk   rp   rq   r&   rU   rV   c
                     |j                  ||||      \  |d   }
t        |
t              rt        |
      n|
j                  d   |	rt        |	      ndddt        j                         }d }d |||dg |j                  d	d
      dddt        t        df   f f
d}t         |       d      S )zDGenerate a streaming Responses API reply (SSE) using DirectStreamer.rH   	input_idsNr   resp_msg_responseparallel_tool_callsFauto)id
created_atrl   objectrN   r   r@   rI   c                 	  
K   	 j                  t        dt        d.i dg d             dz  j                  t        dt        d.i dg d             dz  j                  t	        dt        d	dd
g                    dz  j                  t        ddt        ddg                    dz  d} g }d}|s=j                          d {   }|g}	 	 |j                  j                                 t        d| g       }j                  t;        d(dd| g )             dz  j                  t=        d*dd|             dz  t        d	d!d
|gg +      }j                  t7        d%d|             dz  |gt?        |      z   }tA        jB                        }j                  tE        d,t        d.i d!||d-             dz  y 7 # t        j                  $ r Y nw xY wg }|D ]  }|d} nt        |t              rt        j!                  d|j"                          |j                  j                  t%        d|j"                                     dz  |j                  j                  t'        dt        d.i dg t)        d|j"                        d                   dj+                  |        y j-                  |      x}|t.        j0                  urΉ d}|d   }|d   }	t3        ||d ||	d!"      }
|j                  |
       dz  |j                  j                  t	        d|
                   dz  |j                  j                  t5        d#||	|$                   dz  |j                  j                  t7        d%|
                   dz  | |z  } |j                  j                  t9        d&dd|g '                   dz   |rdj+                  |       |sz?# tF        t        jH                  f$ r jK                           w xY ww)/Nzresponse.createdqueued)statusoutput)typesequence_numberr   r    zresponse.in_progressin_progresszresponse.output_item.addedmessage	assistant)r   r   r   rx   ry   )r   r   output_indexitemzresponse.content_part.addedr   output_text r   r8   annotations)r   item_idr   r   content_indexpartFTz"Exception in response generation: error)r   r   r   zresponse.failedfailedserver_error)coder   )r   r   r   
_tool_callname	argumentsfunction_call	completedr   call_idr   r   r   r   z%response.function_call_arguments.done)r   r   r   r   r   r   zresponse.output_item.donezresponse.output_text.delta)r   r   r   r   r   deltalogprobszresponse.output_text.done)r   r   r   r   r   r8   r   zresponse.content_part.doner   r   r   rx   ry   r   zresponse.completed)r   r   usager4   )&chunk_to_sser   r   r   r   r   r   r   rc   r}   
get_nowaitasyncio
QueueEmptyr|   r$   r]   r   msgr   r   r   joinfeedr#   CONSUMEDr   r   r   r   r   r   r~   compute_usagetotal_tokensr   GeneratorExitCancelledErrorcancel)	full_text
tool_callsdoner8   batch	sse_partsresulttc_idr   r   tc_itemoutput_text_partmsg_item
all_outputr   	input_lenmsg_idr   parserqueuerH   response_baserj   seqstreamers                  r6   event_streamz0ResponseHandler._streaming.<locals>.event_stream   s{    a''(/(+!)!VM!V(SU!V  q''+3(+!)![M![-XZ![  q ''09(+%12%!*#0!,$&	  q ''1: &(+%1&'/]Y[\	 	 q 	
!&,D!FE"!LL)9)9);< #X $6=yfh#i '')8 &(+%&&'&!#
 
 q''09 &(+%&&'-	 	 q0"&$-. " ''/8(+%&%	  q 'Z$z*::
%i1F1FG''*1(+!)!nM!n+V`hm!n  qK - #--  ,.I % `!<#'D!%dL9"LL+MdhhZ)XY%,, $ 1 1$6GUXbfbjbj$k!"
  1HC%,, $ 1 1$7->8;19 2*.;2*3;352?^]a]e]e2f	2*	%&!" #%'')"44" "-V[[=N3N62[%^-D-DD+5,j(A'-f~,2;,?	*B',,1)8)-.7+6+" !+ 1 1' : , 1 ) 0 0$($5$5(D1M<?9E18	)*%&	!" !$q ) 0 0$($5$5(N1X<?499E6?15)*	%&!" !$q ) 0 0$($5$5(C1L<?9E18	)*%&	!" !$q$!T)	!(( -- 6)E,2471223*.-/!"
 qA`!D ! ggi00] N "7#9#9:  !	sb   RC)Q .H	/Q 7!H C0Q R	Q H"Q !H""C$Q REQ +RRztext/event-stream)
media_type)generate_streamingr|   r~   lenshaper#   timerc   r   r1   r
   )rj   rH   rl   rK   rk   rG   rp   rq   rU   rV   r   r   resp_idr   r   r   r   r   r   r   r   r   s   ``            @@@@@@@@r6   rh   zResponseHandler._streaming   s     &88	6S]jt8ux;'	&0D&AC	NyWYGZ	0;,YY[
*&
|$ $ #'88,A5#I!	
d	N39$= d	 d	L !<OPPr5   c
                   K   |j                  |||||       d{   \  }
}}t        d| dddt        d|
g       gg 	      g}|	Vt        j                  |
|	      }|>t        |      D ]0  \  }}| d
}|j                  t        ||d|d   |d   d             2 t        |t        |            }t        d| t        j                         d||d|g |j                  dd      d
      }t        |j                  d            S 7 w)z;Generate a non-streaming Responses API reply (single JSON).r   Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Fr   )
r   r   r   rl   r   r   r   rN   r   r@   T)exclude_none)generate_non_streamingr   r   r#   parse	enumerater}   r   r   r   r   r   rc   r	   
model_dump)rj   rH   rl   rK   rk   rG   rp   rq   rU   rV   r   r   generated_idsoutput_itemsparsed_callsitcr   r   r   s                       r6   ri   zResponseHandler._non_streaming  sV     5@4V4V9fjZ 5W 5
 /
+	9m
 "*&" +Y\^_`	
 ")//	;GL'&|4 EAr)l*5E ''0$$)!0!#F&(o#.	 i]);<zl#yy{ $)> F
 H//T/BCC]/
s   DD	C-Dmodel_generation_configrM   c                 t    t         |   |||      }|j                  d      t        |d         |_        |S )zXApply Responses API params (``max_output_tokens``) on top of the base generation config.rL   max_output_tokens)superrf   rc   r3   max_new_tokens)rj   rG   r   rM   r,   	__class__s        r6   rf   z(ResponseHandler._build_generation_config  sF    !G<TCZci<j88'(4/248K3L/M,  r5   )N)F)r.   r/   r0   __doc__r+   _valid_params_classUNUSED_RESPONSE_FIELDS_unused_fieldsr   r1   r
   r	   rs   staticmethodr~   r`   r!   rh   ri   boolrf   __classcell__)r   s   @r6   rF   rF   \   s   5C+NA A3 ACTWcCc AJ ! !$t* ! !^ $(JQJQ !JQ >	JQ
 JQ JQ JQ 'JQ )JQ D[JQ 
JQp $(;D;D !;D >	;D
 ;D ;D ;D ';D );D D[;D 
;D~!T !L^ !hl ! !r5   rF   input_tokensoutput_tokensrI   c           	      P    t        | || |z   t        d      t        d            S )a  Build a ``ResponseUsage`` object for a Responses API reply.

    Args:
        input_tokens (`int`): Number of prompt tokens.
        output_tokens (`int`): Number of generated tokens.

    Returns:
        `ResponseUsage`: Usage statistics with zero-filled detail fields.
    r   )cached_tokens)reasoning_tokens)r   r   r   input_tokens_detailsoutput_tokens_details)r   r   r   )r   r   s     r6   r   r   $  s/     !#!M1/a@11E r5   ):r   r   r   collections.abcr   typingr   utilsr   utils.import_utilsr   fastapir   fastapi.responsesr	   r
   openai.types.responsesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   -openai.types.responses.response_create_paramsr   %openai.types.responses.response_usager   r   r   r!   r"   r#   r$   r%   transformersr&   r'   r(   r)   
get_loggerr.   r]   r+   r   rF   r3   r   r4   r5   r6   <module>r      s      *    4 %A    & \ll  gg 
		H	%0MUZ 
 "E!k E!P C M r5   