From 368d00260052455d05e4b13c726ca0e84de2b6d1 Mon Sep 17 00:00:00 2001 From: Morgan Date: Sat, 1 Feb 2025 22:44:32 +0900 Subject: [PATCH] Added o1, o3-mini and fixed setting for reasoning models --- src/lib/ChatRequest.svelte | 4 ++-- src/lib/providers/openai/models.svelte | 23 +++++++++++++++++++++++ src/lib/providers/openai/request.svelte | 10 ++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte index ff98595..6ccc5e9 100644 --- a/src/lib/ChatRequest.svelte +++ b/src/lib/ChatRequest.svelte @@ -195,7 +195,7 @@ export class ChatRequest { if (opts.maxTokens) value = opts.maxTokens // only as large as requested if (value > maxAllowed || value < 1) value = null // if over max model, do not define max if (value) value = Math.floor(value) - if (chatSettings.model === 'o1-preview' || chatSettings.model === 'o1-mini') { + if (modelDetail.reasoning == true) { key = 'max_completion_tokens'; } } @@ -213,7 +213,7 @@ export class ChatRequest { if (value !== null) acc[key] = value return acc }, {}), - stream: opts.streaming + stream: modelDetail.reasoning ? false : opts.streaming } // Make the chat completion request diff --git a/src/lib/providers/openai/models.svelte b/src/lib/providers/openai/models.svelte index 6135f3e..48a6051 100644 --- a/src/lib/providers/openai/models.svelte +++ b/src/lib/providers/openai/models.svelte @@ -107,16 +107,36 @@ const gpt4128kpreview = { } const o1preview = { ...chatModelBase, + stream: false, + reasoning: true, prompt: 0.00001, // $0.01 per 1000 tokens prompt completion: 0.00003, // $0.03 per 1000 tokens completion max: 131072 // 128k max token buffer } const o1mini = { ...chatModelBase, + stream: false, + reasoning: true, prompt: 0.00001, // $0.01 per 1000 tokens prompt completion: 0.00003, // $0.03 per 1000 tokens completion max: 131072 // 128k max token buffer } +const o1 = { + ...chatModelBase, + stream: false, + reasoning: true, + prompt: 15 / 1_000_000, + completion: 60 / 1_000_000, + max: 200000 +} +const o3mini = { + ...chatModelBase, + stream: false, + reasoning: true, + prompt: 1.1 / 1_000_000, + completion: 4.4 / 1_000_000, + max: 200000 +} const llama3 = { ...chatModelBase, prompt: 0.00003, @@ -135,6 +155,7 @@ const claude35haiku = { completion: 4/1_000_000, // $0.004 per 1000 tokens completion max: 4096 // 4k max token buffer } + export const chatModels : Record = { 'gpt-3.5-turbo': { ...gpt3516k }, 'gpt-3.5-turbo-0301': { ...gpt35 }, @@ -159,6 +180,8 @@ export const chatModels : Record = { 'gpt-4-32k-0314': { ...gpt432k }, 'o1-preview': { ...o1preview }, 'o1-mini': { ...o1mini }, + 'o1': { ...o1 }, + 'o3-mini': { ...o3mini }, 'mixtral-8x7b-32768': { ...llama3 }, 'llama3-70b-8192': { ...llama3 }, 'llama3-8b-8192': { ...llama3 }, diff --git a/src/lib/providers/openai/request.svelte b/src/lib/providers/openai/request.svelte index c35daa1..c7ff214 100644 --- a/src/lib/providers/openai/request.svelte +++ b/src/lib/providers/openai/request.svelte @@ -13,6 +13,7 @@ export const chatRequest = async ( opts: ChatCompletionOpts): Promise => { // OpenAI Request const model = chatRequest.getModel() + const modelDetail = getModelDetail(model) const signal = chatRequest.controller.signal const abortListener = (e:Event) => { chatRequest.updating = false @@ -31,7 +32,12 @@ export const chatRequest = async ( signal } - if (opts.streaming) { + if (modelDetail.stream === false) { + opts.streaming = false; + console.log("Disabled streaming on reasoning models."); + } + + if (opts.streaming && !modelDetail.stream) { /** * Streaming request/response * We'll get the response a token at a time, as soon as they are ready @@ -169,4 +175,4 @@ export const imageRequest = async ( return chatResponse } - \ No newline at end of file +