Added o1, o3-mini and fixed setting for reasoning models

This commit is contained in:
2025-02-01 22:44:32 +09:00
parent 3c115692ef
commit 368d002600
3 changed files with 33 additions and 4 deletions

View File

@@ -195,7 +195,7 @@ export class ChatRequest {
if (opts.maxTokens) value = opts.maxTokens // only as large as requested if (opts.maxTokens) value = opts.maxTokens // only as large as requested
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
if (value) value = Math.floor(value) if (value) value = Math.floor(value)
if (chatSettings.model === 'o1-preview' || chatSettings.model === 'o1-mini') { if (modelDetail.reasoning == true) {
key = 'max_completion_tokens'; key = 'max_completion_tokens';
} }
} }
@@ -213,7 +213,7 @@ export class ChatRequest {
if (value !== null) acc[key] = value if (value !== null) acc[key] = value
return acc return acc
}, {}), }, {}),
stream: opts.streaming stream: modelDetail.reasoning ? false : opts.streaming
} }
// Make the chat completion request // Make the chat completion request

View File

@@ -107,16 +107,36 @@ const gpt4128kpreview = {
} }
const o1preview = { const o1preview = {
...chatModelBase, ...chatModelBase,
stream: false,
reasoning: true,
prompt: 0.00001, // $0.01 per 1000 tokens prompt prompt: 0.00001, // $0.01 per 1000 tokens prompt
completion: 0.00003, // $0.03 per 1000 tokens completion completion: 0.00003, // $0.03 per 1000 tokens completion
max: 131072 // 128k max token buffer max: 131072 // 128k max token buffer
} }
const o1mini = { const o1mini = {
...chatModelBase, ...chatModelBase,
stream: false,
reasoning: true,
prompt: 0.00001, // $0.01 per 1000 tokens prompt prompt: 0.00001, // $0.01 per 1000 tokens prompt
completion: 0.00003, // $0.03 per 1000 tokens completion completion: 0.00003, // $0.03 per 1000 tokens completion
max: 131072 // 128k max token buffer max: 131072 // 128k max token buffer
} }
const o1 = {
...chatModelBase,
stream: false,
reasoning: true,
prompt: 15 / 1_000_000,
completion: 60 / 1_000_000,
max: 200000
}
const o3mini = {
...chatModelBase,
stream: false,
reasoning: true,
prompt: 1.1 / 1_000_000,
completion: 4.4 / 1_000_000,
max: 200000
}
const llama3 = { const llama3 = {
...chatModelBase, ...chatModelBase,
prompt: 0.00003, prompt: 0.00003,
@@ -135,6 +155,7 @@ const claude35haiku = {
completion: 4/1_000_000, // $0.004 per 1000 tokens completion completion: 4/1_000_000, // $0.004 per 1000 tokens completion
max: 4096 // 4k max token buffer max: 4096 // 4k max token buffer
} }
export const chatModels : Record<string, ModelDetail> = { export const chatModels : Record<string, ModelDetail> = {
'gpt-3.5-turbo': { ...gpt3516k }, 'gpt-3.5-turbo': { ...gpt3516k },
'gpt-3.5-turbo-0301': { ...gpt35 }, 'gpt-3.5-turbo-0301': { ...gpt35 },
@@ -159,6 +180,8 @@ export const chatModels : Record<string, ModelDetail> = {
'gpt-4-32k-0314': { ...gpt432k }, 'gpt-4-32k-0314': { ...gpt432k },
'o1-preview': { ...o1preview }, 'o1-preview': { ...o1preview },
'o1-mini': { ...o1mini }, 'o1-mini': { ...o1mini },
'o1': { ...o1 },
'o3-mini': { ...o3mini },
'mixtral-8x7b-32768': { ...llama3 }, 'mixtral-8x7b-32768': { ...llama3 },
'llama3-70b-8192': { ...llama3 }, 'llama3-70b-8192': { ...llama3 },
'llama3-8b-8192': { ...llama3 }, 'llama3-8b-8192': { ...llama3 },

View File

@@ -13,6 +13,7 @@ export const chatRequest = async (
opts: ChatCompletionOpts): Promise<ChatCompletionResponse> => { opts: ChatCompletionOpts): Promise<ChatCompletionResponse> => {
// OpenAI Request // OpenAI Request
const model = chatRequest.getModel() const model = chatRequest.getModel()
const modelDetail = getModelDetail(model)
const signal = chatRequest.controller.signal const signal = chatRequest.controller.signal
const abortListener = (e:Event) => { const abortListener = (e:Event) => {
chatRequest.updating = false chatRequest.updating = false
@@ -31,7 +32,12 @@ export const chatRequest = async (
signal signal
} }
if (opts.streaming) { if (modelDetail.stream === false) {
opts.streaming = false;
console.log("Disabled streaming on reasoning models.");
}
if (opts.streaming && !modelDetail.stream) {
/** /**
* Streaming request/response * Streaming request/response
* We'll get the response a token at a time, as soon as they are ready * We'll get the response a token at a time, as soon as they are ready