From 368d00260052455d05e4b13c726ca0e84de2b6d1 Mon Sep 17 00:00:00 2001
From: Morgan <me@morgan.kr>
Date: Sat, 1 Feb 2025 22:44:32 +0900
Subject: [PATCH] Added o1, o3-mini and fixed setting for reasoning models

---
 src/lib/ChatRequest.svelte              |  4 ++--
 src/lib/providers/openai/models.svelte  | 23 +++++++++++++++++++++++
 src/lib/providers/openai/request.svelte | 10 ++++++++--
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte
index ff98595..6ccc5e9 100644
--- a/src/lib/ChatRequest.svelte
+++ b/src/lib/ChatRequest.svelte
@@ -195,7 +195,7 @@ export class ChatRequest {
               if (opts.maxTokens) value = opts.maxTokens // only as large as requested
               if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
               if (value) value = Math.floor(value)
-              if (chatSettings.model === 'o1-preview' || chatSettings.model === 'o1-mini') {
+              if (modelDetail.reasoning == true) {
                 key = 'max_completion_tokens';
               }
             }
@@ -213,7 +213,7 @@ export class ChatRequest {
             if (value !== null) acc[key] = value
             return acc
           }, {}),
-          stream: opts.streaming
+          stream: modelDetail.reasoning ? false : opts.streaming
         }
 
         // Make the chat completion request
diff --git a/src/lib/providers/openai/models.svelte b/src/lib/providers/openai/models.svelte
index 6135f3e..48a6051 100644
--- a/src/lib/providers/openai/models.svelte
+++ b/src/lib/providers/openai/models.svelte
@@ -107,16 +107,36 @@ const gpt4128kpreview = {
 }
 const o1preview = {
       ...chatModelBase,
+      stream: false,
+      reasoning: true,
       prompt: 0.00001, // $0.01 per 1000 tokens prompt
       completion: 0.00003, // $0.03 per 1000 tokens completion
       max: 131072 // 128k max token buffer
 }
 const o1mini = {
       ...chatModelBase,
+      stream: false,
+      reasoning: true,
       prompt: 0.00001, // $0.01 per 1000 tokens prompt
       completion: 0.00003, // $0.03 per 1000 tokens completion
       max: 131072 // 128k max token buffer
 }
+const o1 = {
+      ...chatModelBase,
+      stream: false,
+      reasoning: true,
+      prompt: 15 / 1_000_000,
+      completion: 60 / 1_000_000,
+      max: 200000
+}
+const o3mini = {
+      ...chatModelBase,
+      stream: false,
+      reasoning: true,
+      prompt: 1.1 / 1_000_000,
+      completion: 4.4 / 1_000_000,
+      max: 200000
+}
 const llama3 = {
   ...chatModelBase,
   prompt: 0.00003,
@@ -135,6 +155,7 @@ const claude35haiku = {
       completion: 4/1_000_000, // $0.004 per 1000 tokens completion
       max: 4096 // 4k max token buffer
 }
+
 export const chatModels : Record<string, ModelDetail> = {
   'gpt-3.5-turbo': { ...gpt3516k },
   'gpt-3.5-turbo-0301': { ...gpt35 },
@@ -159,6 +180,8 @@ export const chatModels : Record<string, ModelDetail> = {
   'gpt-4-32k-0314': { ...gpt432k },
   'o1-preview': { ...o1preview },
   'o1-mini': { ...o1mini },
+  'o1': { ...o1 },
+  'o3-mini': { ...o3mini },
   'mixtral-8x7b-32768': { ...llama3 },
   'llama3-70b-8192': { ...llama3 },
   'llama3-8b-8192': { ...llama3 },
diff --git a/src/lib/providers/openai/request.svelte b/src/lib/providers/openai/request.svelte
index c35daa1..c7ff214 100644
--- a/src/lib/providers/openai/request.svelte
+++ b/src/lib/providers/openai/request.svelte
@@ -13,6 +13,7 @@ export const chatRequest = async (
   opts: ChatCompletionOpts): Promise<ChatCompletionResponse> => {
     // OpenAI Request
       const model = chatRequest.getModel()
+      const modelDetail = getModelDetail(model)
       const signal = chatRequest.controller.signal
       const abortListener = (e:Event) => {
         chatRequest.updating = false
@@ -31,7 +32,12 @@ export const chatRequest = async (
         signal
       }
 
-      if (opts.streaming) {
+      if (modelDetail.stream === false) {
+        opts.streaming = false;
+        console.log("Disabled streaming on reasoning models.");
+      }
+
+      if (opts.streaming && !modelDetail.stream) {
       /**
              * Streaming request/response
              * We'll get the response a token at a time, as soon as they are ready
@@ -169,4 +175,4 @@ export const imageRequest = async (
   return chatResponse
 }
 
-</script>
\ No newline at end of file
+</script>