responses.js

Sleeping

App Files Files Community

Wauplin HF Staff commited on Aug 5

Commit

5979e6b

verified ·

1 Parent(s): 6cb77de

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

src/openai_patch.ts +5 -0
src/routes/health.ts +5 -0
src/routes/index.ts +1 -0
src/routes/responses.ts +19 -76
src/schemas.ts +2 -2
src/server.ts +3 -1

src/openai_patch.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import type {
 	ResponseOutputText,
 } from "openai/resources/responses/responses";
 export interface ReasoningTextContent {
 	type: "reasoning_text";
 	text: string;
@@ -42,3 +43,7 @@ export type PatchedResponseStreamEvent =
 	| PatchedResponseReasoningTextDoneEvent;
 export type PatchedResponseContentPart = ResponseOutputText | ResponseOutputRefusal;

 	ResponseOutputText,
 } from "openai/resources/responses/responses";
+import type { ChatCompletionChunk } from "openai/resources/chat/completions";
 export interface ReasoningTextContent {
 	type: "reasoning_text";
 	text: string;
 	| PatchedResponseReasoningTextDoneEvent;
 export type PatchedResponseContentPart = ResponseOutputText | ResponseOutputRefusal;
+export type PatchedDeltaWithReasoning = ChatCompletionChunk.Choice.Delta & {
+	reasoning?: string;
+};

src/routes/health.ts ADDED Viewed

	@@ -0,0 +1,5 @@

+import type { Request, Response } from "express";
+export function getHealth(req: Request, res: Response): void {
+	res.send("OK");
+}

src/routes/index.ts CHANGED Viewed

@@ -1,2 +1,3 @@
 export { postCreateResponse } from "./responses.js";
 export { getLandingPageHtml } from "./landingPageHtml.js";

 export { postCreateResponse } from "./responses.js";
 export { getLandingPageHtml } from "./landingPageHtml.js";
+export { getHealth } from "./health.js";

src/routes/responses.ts CHANGED Viewed

@@ -15,16 +15,15 @@ import type {
 	PatchedResponseReasoningItem,
 	PatchedResponseStreamEvent,
 	ReasoningTextContent,
 } from "../openai_patch";
 import type {
 	ChatCompletionCreateParamsStreaming,
 	ChatCompletionMessageParam,
 	ChatCompletionTool,
-	ChatCompletionChunk,
 } from "openai/resources/chat/completions.js";
 import type { FunctionParameters } from "openai/resources/shared.js";
 import { callMcpTool, connectMcpServer } from "../mcp.js";
-import type { Stream } from "openai/core/streaming.js";
 class StreamingError extends Error {
 	constructor(message: string) {
@@ -36,10 +35,6 @@ class StreamingError extends Error {
 type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
 const SEQUENCE_NUMBER_PLACEHOLDER = -1;
-// TODO: this depends on the model. To be adapted.
-const REASONING_START_TOKEN = "<think>";
-const REASONING_END_TOKEN = "</think>";
 export const postCreateResponse = async (
 	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
@@ -498,7 +493,7 @@ async function* handleOneTurnStream(
 		baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
 		apiKey: apiKey,
 	});
-	const stream = wrapChatCompletionStream(await client.chat.completions.create(payload));
 	let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
 	let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
 	let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
@@ -516,22 +511,26 @@ async function* handleOneTurnStream(
 			};
 		}
-		const delta = chunk.choices[0].delta;
-		if (delta.content) {
 			let currentOutputItem = responseObject.output.at(-1);
-			let deltaText = delta.content;
 			// If start or end of reasoning, skip token and update the current text mode
-			if (deltaText === REASONING_START_TOKEN) {
 				currentTextMode = "reasoning";
-				continue;
-			} else if (deltaText === REASONING_END_TOKEN) {
-				currentTextMode = "text";
-				for await (const event of closeLastOutputItem(responseObject, payload, mcpToolsMapping)) {
-					yield event;
 				}
-				continue;
 			}
 			// If start of a new message, create it
@@ -611,7 +610,7 @@ async function* handleOneTurnStream(
 					item_id: currentOutputMessage.id,
 					output_index: responseObject.output.length - 1,
 					content_index: currentOutputMessage.content.length - 1,
-					delta: delta.content,
 					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 				};
 			} else if (currentTextMode === "reasoning") {
@@ -636,13 +635,13 @@ async function* handleOneTurnStream(
 				// Add text delta
 				const contentPart = currentReasoningItem.content.at(-1) as ReasoningTextContent;
-				contentPart.text += delta.content;
 				yield {
 					type: "response.reasoning_text.delta",
 					item_id: currentReasoningItem.id,
 					output_index: responseObject.output.length - 1,
 					content_index: currentReasoningItem.content.length - 1,
-					delta: delta.content,
 					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 				};
 			}
@@ -992,59 +991,3 @@ async function* closeLastOutputItem(
 		}
 	}
 }
-/*
- * Wrap a chat completion stream to handle reasoning.
- *
- * The reasoning start and end tokens might be sent in a longer text chunk.
- * We want to split that text chunk so that the reasoning token is isolated in a separate chunk.
- *
- * TODO: also adapt for when reasoning token is sent in separate chunks.
- */
-async function* wrapChatCompletionStream(
-	stream: Stream<ChatCompletionChunk & { _request_id?: string | null | undefined }>
-): AsyncGenerator<ChatCompletionChunk & { _request_id?: string | null | undefined }> {
-	function cloneChunkWithContent(baseChunk: ChatCompletionChunk, content: string): ChatCompletionChunk {
-		return {
-			...baseChunk,
-			choices: [
-				{
-					...baseChunk.choices[0],
-					delta: {
-						...baseChunk.choices[0].delta,
-						content,
-					},
-				},
-			],
-		};
-	}
-	function* splitAndYieldChunk(chunk: ChatCompletionChunk, content: string, token: string) {
-		const [beforeContent, afterContent] = content.split(token, 2);
-		if (beforeContent) {
-			yield cloneChunkWithContent(chunk, beforeContent);
-		}
-		yield cloneChunkWithContent(chunk, token);
-		if (afterContent) {
-			yield cloneChunkWithContent(chunk, afterContent);
-		}
-	}
-	for await (const chunk of stream) {
-		const content = chunk.choices[0].delta.content;
-		if (!content) {
-			yield chunk;
-			continue;
-		}
-		if (content.includes(REASONING_START_TOKEN)) {
-			yield* splitAndYieldChunk(chunk, content, REASONING_START_TOKEN);
-		} else if (content.includes(REASONING_END_TOKEN)) {
-			yield* splitAndYieldChunk(chunk, content, REASONING_END_TOKEN);
-		} else {
-			yield chunk;
-		}
-	}
-}

 	PatchedResponseReasoningItem,
 	PatchedResponseStreamEvent,
 	ReasoningTextContent,
+	PatchedDeltaWithReasoning,
 } from "../openai_patch";
 import type {
 	ChatCompletionCreateParamsStreaming,
 	ChatCompletionMessageParam,
 	ChatCompletionTool,
 } from "openai/resources/chat/completions.js";
 import type { FunctionParameters } from "openai/resources/shared.js";
 import { callMcpTool, connectMcpServer } from "../mcp.js";
 class StreamingError extends Error {
 	constructor(message: string) {
 type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
 const SEQUENCE_NUMBER_PLACEHOLDER = -1;
 export const postCreateResponse = async (
 	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
 		baseURL: process.env.OPENAI_BASE_URL ?? "https://router.huggingface.co/v1",
 		apiKey: apiKey,
 	});
+	const stream = await client.chat.completions.create(payload);
 	let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
 	let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
 	let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
 			};
 		}
+		const delta = chunk.choices[0].delta as PatchedDeltaWithReasoning;
+		if (delta.content || delta.reasoning) {
 			let currentOutputItem = responseObject.output.at(-1);
 			// If start or end of reasoning, skip token and update the current text mode
+			if (delta.reasoning) {
+				if (currentTextMode === "text") {
+					for await (const event of closeLastOutputItem(responseObject, payload, mcpToolsMapping)) {
+						yield event;
+					}
+				}
 				currentTextMode = "reasoning";
+			} else if (delta.content) {
+				if (currentTextMode === "reasoning") {
+					for await (const event of closeLastOutputItem(responseObject, payload, mcpToolsMapping)) {
+						yield event;
+					}
 				}
+				currentTextMode = "text";
 			}
 			// If start of a new message, create it
 					item_id: currentOutputMessage.id,
 					output_index: responseObject.output.length - 1,
 					content_index: currentOutputMessage.content.length - 1,
+					delta: delta.content as string,
 					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 				};
 			} else if (currentTextMode === "reasoning") {
 				// Add text delta
 				const contentPart = currentReasoningItem.content.at(-1) as ReasoningTextContent;
+				contentPart.text += delta.reasoning;
 				yield {
 					type: "response.reasoning_text.delta",
 					item_id: currentReasoningItem.id,
 					output_index: responseObject.output.length - 1,
 					content_index: currentReasoningItem.content.length - 1,
+					delta: delta.reasoning as string,
 					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 				};
 			}
 		}
 	}
 }

src/schemas.ts CHANGED Viewed

@@ -101,8 +101,8 @@ export const createResponseParamsSchema = z.object({
 							z.object({
 								type: z.literal("output_text"),
 								text: z.string(),
-								annotations: z.array(z.object({})).optional(), // TODO: incomplete
-								logprobs: z.array(z.object({})).optional(), // TODO: incomplete
 							}),
 							z.object({
 								type: z.literal("refusal"),

 							z.object({
 								type: z.literal("output_text"),
 								text: z.string(),
+								annotations: z.array(z.object({})).nullable().optional(), // TODO: incomplete
+								logprobs: z.array(z.object({})).nullable().optional(), // TODO: incomplete
 							}),
 							z.object({
 								type: z.literal("refusal"),

src/server.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import express, { type Express } from "express";
 import { createResponseParamsSchema } from "./schemas.js";
 import { validateBody } from "./middleware/validation.js";
 import { requestLogger } from "./middleware/logging.js";
-import { getLandingPageHtml, postCreateResponse } from "./routes/index.js";
 export const createApp = (): Express => {
 	const app: Express = express();
@@ -14,6 +14,8 @@ export const createApp = (): Express => {
 	// Routes
 	app.get("/", getLandingPageHtml);
 	app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
 	return app;

 import { createResponseParamsSchema } from "./schemas.js";
 import { validateBody } from "./middleware/validation.js";
 import { requestLogger } from "./middleware/logging.js";
+import { getLandingPageHtml, postCreateResponse, getHealth } from "./routes/index.js";
 export const createApp = (): Express => {
 	const app: Express = express();
 	// Routes
 	app.get("/", getLandingPageHtml);
+	app.get("/health", getHealth);
 	app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
 	return app;