Models cleanup

This commit is contained in:
2025-02-28 07:01:53 +09:00
parent 795941d874
commit bace639f1d

View File

@@ -1,15 +1,16 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import { getApiBase, getEndpointCompletions, getEndpointGenerations } from '../../ApiUtil.svelte'
import { countTokens } from '../../Models.svelte'
import { countMessageTokens } from '../../Stats.svelte'
import { globalStorage } from '../../Storage.svelte'
import type { Chat, Message, Model, ModelDetail } from '../../Types.svelte'
import { chatRequest, imageRequest } from './request.svelte'
import { checkModel } from './util.svelte'
import { encode } from 'gpt-tokenizer'
import { get } from 'svelte/store'
const hiddenSettings = { import { getApiBase, getEndpointCompletions, getEndpointGenerations } from "../../ApiUtil.svelte";
import { countTokens } from "../../Models.svelte";
import { countMessageTokens } from "../../Stats.svelte";
import { globalStorage } from "../../Storage.svelte";
import type { Chat, Message, Model, ModelDetail } from "../../Types.svelte";
import { chatRequest, imageRequest } from "./request.svelte";
import { checkModel } from "./util.svelte";
import { encode } from "gpt-tokenizer";
import { get } from "svelte/store";
const hiddenSettings = {
startSequence: true, startSequence: true,
stopSequence: true, stopSequence: true,
aggressiveStop: true, aggressiveStop: true,
@@ -21,301 +22,209 @@ const hiddenSettings = {
systemMessageStart: true, systemMessageStart: true,
systemMessageEnd: true, systemMessageEnd: true,
repetitionPenalty: true, repetitionPenalty: true,
holdSocket: true holdSocket: true,
// leadPrompt: true // leadPrompt: true
} as any } as any;
const chatModelBase = { const chatModelBase = {
type: 'chat', type: "chat",
help: 'Below are the settings that OpenAI allows to be changed for the API calls. See the <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">OpenAI API docs</a> for more details.', help: 'Below are the settings that OpenAI allows to be changed for the API calls. See the <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">OpenAI API docs</a> for more details.',
preFillMerge: (existingContent, newContent) => { preFillMerge: (existingContent, newContent) => {
// continuing assistant prompt. see if we need to add a space before we merge the new completion if (existingContent && !newContent.match(/^('(t|ll|ve|m|d|re)[^a-z]|\s|[.,;:(_-{}*^%$#@!?+=~`[\]])/i)) {
// there has to be a better way to do this existingContent += " ";
if (existingContent && !newContent.match(/^('(t|ll|ve|m|d|re)[^a-z]|\s|[.,;:(_-{}*^%$#@!?+=~`[\]])/i)) { }
// add a trailing space if our new content isn't a contraction return existingContent;
existingContent += ' ' },
} request: chatRequest,
return existingContent check: checkModel,
}, getTokens: (value) => encode(value),
request: chatRequest, getEndpoint: (model) => get(globalStorage).openAICompletionEndpoint || getApiBase() + getEndpointCompletions(),
check: checkModel, hideSetting: (chatId, setting) => !!hiddenSettings[setting.key],
getTokens: (value) => encode(value), countMessageTokens: (message: Message, model: Model, chat: Chat) => {
getEndpoint: (model) => get(globalStorage).openAICompletionEndpoint || (getApiBase() + getEndpointCompletions()), return countTokens(model, "## " + message.role + " ##:\r\n\r\n" + message.content + "\r\n\r\n\r\n");
hideSetting: (chatId, setting) => !!hiddenSettings[setting.key], },
countMessageTokens: (message:Message, model:Model, chat: Chat) => { countPromptTokens: (prompts: Message[], model: Model, chat: Chat): number => {
return countTokens(model, '## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n') return (
}, prompts.reduce((a, m) => {
countPromptTokens: (prompts:Message[], model:Model, chat: Chat):number => { a += countMessageTokens(m, model, chat);
// Not sure how OpenAI formats it, but this seems to get close to the right counts. return a;
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different. }, 0) + 3
// Complete stab in the dark here -- update if you know where all the extra tokens really come from. );
return prompts.reduce((a, m) => { },
a += countMessageTokens(m, model, chat) } as ModelDetail;
return a
}, 0) + 3 // Always seems to be message counts + 3
}
} as ModelDetail
// Reference: https://openai.com/pricing#language-models export const chatModels: Record<string, ModelDetail> = {
const placeholder = {
...chatModelBase,
prompt: 0.0, // $0.0015 per 1000 tokens prompt
completion: 0.0, // $0.002 per 1000 tokens completion
max: 100000 // 4k max token buffer
}
const gpt35 = {
...chatModelBase,
prompt: 0.0000015, // $0.0015 per 1000 tokens prompt
completion: 0.000002, // $0.002 per 1000 tokens completion
max: 4096 // 4k max token buffer
}
const gpt3516k = {
...chatModelBase,
prompt: 0.000001, // $0.001 per 1000 tokens prompt
completion: 0.0000015, // $0.0015 per 1000 tokens completion
max: 16384 // 16k max token buffer
}
const gpt4 = {
...chatModelBase,
prompt: 0.00003, // $0.03 per 1000 tokens prompt
completion: 0.00006, // $0.06 per 1000 tokens completion
max: 8192 // 8k max token buffer
}
const gpt4o = {
...chatModelBase,
prompt: 0.000005, // $0.005 per 1000 tokens prompt
completion: 0.000015, // $0.015 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const gpt4omini = {
...chatModelBase,
prompt: 0.00000015, // $0.00015 per 1000 tokens prompt
completion: 0.00000060, // $0.00060 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const gpt432k = {
...chatModelBase,
prompt: 0.00006, // $0.06 per 1000 tokens prompt
completion: 0.00012, // $0.12 per 1000 tokens completion
max: 32768 // 32k max token buffer
}
const gpt4128kpreview = {
...chatModelBase,
prompt: 0.00001, // $0.01 per 1000 tokens prompt
completion: 0.00003, // $0.03 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1preview = {
...chatModelBase,
reasoning: true,
prompt: 0.00001, // $0.01 per 1000 tokens prompt
completion: 0.00003, // $0.03 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1mini = {
...chatModelBase,
reasoning: true,
prompt: 0.00001, // $0.01 per 1000 tokens prompt
completion: 0.00003, // $0.03 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1 = {
...chatModelBase,
reasoning: true,
prompt: 15 / 1_000_000,
completion: 60 / 1_000_000,
max: 200000
}
const o3mini = {
...chatModelBase,
reasoning: true,
prompt: 1.1 / 1_000_000,
completion: 4.4 / 1_000_000,
max: 200000
}
const llama3 = {
...chatModelBase,
prompt: 0.00003,
completion: 0.00006,
max: 8192
}
const claude35sonnet = {
...chatModelBase,
prompt: 0.00000375, // $0.00375 per 1000 tokens prompt
completion: 0.000015, // $0.015 per 1000 tokens completion
max: 8192 // 4k max token buffer
}
const claude35haiku = {
...chatModelBase,
prompt: 1/1_000_000, // $0.001 per 1000 tokens prompt
completion: 4/1_000_000, // $0.004 per 1000 tokens completion
max: 4096 // 4k max token buffer
}
export const chatModels : Record<string, ModelDetail> = { // OpenAI Models
'gpt-3.5-turbo': { ...gpt3516k },
'gpt-3.5-turbo-0301': { ...gpt35 },
'gpt-3.5-turbo-0613': { ...gpt35 },
'gpt-3.5-turbo-1106': { ...gpt3516k },
'gpt-3.5-turbo-16k': { ...gpt3516k },
'gpt-3.5-turbo-16k-0613': { ...gpt3516k },
'gpt-4': { ...gpt4 },
'gpt-4o': { ...gpt4o },
'gpt-4o-2024-05-13': { ...gpt4o },
'gpt-4o-2024-08-06': { ...gpt4o },
'chatgpt-4o-latest': { ...gpt4o },
'gpt-4o-mini': { ...gpt4omini },
'gpt-4o-mini-2024-07-18': { ...gpt4omini },
'gpt-4-turbo-preview': { ...gpt4128kpreview },
'gpt-4-turbo-2024-04-09': { ...gpt4128kpreview },
'gpt-4-0314': { ...gpt4 },
'gpt-4-0613': { ...gpt4 },
'gpt-4-1106-preview': { ...gpt4128kpreview },
'gpt-4-0125-preview': { ...gpt4128kpreview },
'gpt-4-32k': { ...gpt432k },
'gpt-4-32k-0314': { ...gpt432k },
'o1-preview': { ...o1preview },
'o1-mini': { ...o1mini },
'o1': {
...chatModelBase,
reasoning: true,
prompt: 15 / 1_000_000,
completion: 60 / 1_000_000,
max: 200000
},
'o3-mini': {
...chatModelBase,
reasoning: true,
prompt: 1.1 / 1_000_000,
completion: 4.4 / 1_000_000,
max: 200000
},
'claude-3-7-sonnet-20250219': {
...chatModelBase,
prompt: 3 / 1_000_000,
completion: 15 / 1_000_000,
max: 8192
},
'claude-3-5-sonnet-20241022': {
...chatModelBase,
prompt: 3.75 / 1_000_000,
completion: 15.0 / 1_000_000,
max: 8192
},
'claude-3-5-haiku-20241022': {
...chatModelBase,
prompt: 1 / 1_000_000,
completion: 4 / 1_000_000,
max: 4096
},
'deepseek-r1-distill-qwen-32b': {
...chatModelBase,
prompt: 0.69 / 1_000_000,
completion: 0.69 / 1_000_000,
max: 16384
},
'deepseek-r1-distill-llama-70b': {
...chatModelBase,
prompt: 3 / 1_000_000,
completion: 3 / 1_000_000,
max: 4096
},
// 'mixtral-8x7b-32768': { ...llama3 },
// 'llama3-70b-8192': { ...llama3 },
// 'llama3-8b-8192': { ...llama3 },
}
const imageModelBase = { "gpt-4o-mini": {
type: 'image', ...chatModelBase,
prompt: 0.00, prompt: 0.15 / 1_000_000,
max: 1000, // 1000 char prompt, max completion: 0.6 / 1_000_000,
request: imageRequest, max: 131072,
check: checkModel, },
getTokens: (value) => [0], "gpt-4o": {
getEndpoint: (model) => getApiBase() + getEndpointGenerations(), ...chatModelBase,
hideSetting: (chatId, setting) => false prompt: 2.5 / 1_000_000,
} as ModelDetail completion: 10 / 1_000_000,
max: 131072,
},
"gpt-4.5-preview": {
...chatModelBase,
prompt: 75 / 1_000_000,
completion: 150 / 1_000_000,
max: 131072,
},
"o1-mini": {
...chatModelBase,
reasoning: true,
prompt: 1.1 / 1_000_000,
completion: 4.4 / 1_000_000,
max: 131072,
},
"o1": {
...chatModelBase,
reasoning: true,
prompt: 15 / 1_000_000,
completion: 60 / 1_000_000,
max: 200000,
},
"o3-mini": {
...chatModelBase,
reasoning: true,
prompt: 1.1 / 1_000_000,
completion: 4.4 / 1_000_000,
max: 200000,
},
export const imageModels : Record<string, ModelDetail> = { // Anthropic Models
'dall-e-1024x1024': {
...imageModelBase, "claude-3-7-sonnet-20250219": {
completion: 0.020, // $0.020 per image ...chatModelBase,
opt: { prompt: 3 / 1_000_000,
size: '1024x1024' completion: 15 / 1_000_000,
} max: 200000,
}, },
'dall-e-512x512': { "claude-3-5-sonnet-20241022": {
...imageModelBase, ...chatModelBase,
completion: 0.018, // $0.018 per image prompt: 3.75 / 1_000_000,
opt: { completion: 15.0 / 1_000_000,
size: '512x512' max: 200000,
}
}, },
'dall-e-256x256': { "claude-3-5-haiku-20241022": {
...imageModelBase, ...chatModelBase,
type: 'image', prompt: 1 / 1_000_000,
completion: 0.016, // $0.016 per image completion: 4 / 1_000_000,
opt: { max: 200000,
size: '256x256'
}
}, },
'dall-e-3-1024x1024': {
...imageModelBase, // Groq Models
type: 'image',
completion: 0.04, // $0.040 per image "deepseek-r1-distill-qwen-32b": {
opt: { ...chatModelBase,
model: 'dall-e-3', prompt: 0.69 / 1_000_000,
size: '1024x1024' completion: 0.69 / 1_000_000,
} max: 16384,
}, },
'dall-e-3-1024x1792-Portrait': { "deepseek-r1-distill-llama-70b": {
...imageModelBase, ...chatModelBase,
type: 'image', prompt: 3 / 1_000_000,
completion: 0.08, // $0.080 per image completion: 3 / 1_000_000,
opt: { max: 4096,
model: 'dall-e-3',
size: '1024x1792'
}
}, },
'dall-e-3-1792x1024-Landscape': { };
...imageModelBase,
type: 'image', const imageModelBase = {
completion: 0.08, // $0.080 per image type: "image",
opt: { prompt: 0.0,
model: 'dall-e-3', max: 1000, // 1000 char prompt, max
size: '1792x1024' request: imageRequest,
} check: checkModel,
getTokens: (value) => [0],
getEndpoint: (model) => getApiBase() + getEndpointGenerations(),
hideSetting: (chatId, setting) => false,
} as ModelDetail;
export const imageModels: Record<string, ModelDetail> = {
"dall-e-1024x1024": {
...imageModelBase,
completion: 0.02, // $0.020 per image
opt: {
size: "1024x1024",
},
}, },
'dall-e-3-1024x1024-HD': { "dall-e-512x512": {
...imageModelBase, ...imageModelBase,
type: 'image', completion: 0.018, // $0.018 per image
completion: 0.08, // $0.080 per image opt: {
opt: { size: "512x512",
model: 'dall-e-3', },
size: '1024x1024',
quality: 'hd'
}
}, },
'dall-e-3-1024x1792-Portrait-HD': { "dall-e-256x256": {
...imageModelBase, ...imageModelBase,
type: 'image', type: "image",
completion: 0.12, // $0.080 per image completion: 0.016, // $0.016 per image
opt: { opt: {
model: 'dall-e-3', size: "256x256",
size: '1024x1792', },
quality: 'hd'
}
}, },
'dall-e-3-1792x1024-Landscape-HD': { "dall-e-3-1024x1024": {
...imageModelBase, ...imageModelBase,
type: 'image', type: "image",
completion: 0.12, // $0.080 per image completion: 0.04, // $0.040 per image
opt: { opt: {
model: 'dall-e-3', model: "dall-e-3",
size: '1792x1024', size: "1024x1024",
quality: 'hd' },
} },
} "dall-e-3-1024x1792-Portrait": {
} ...imageModelBase,
type: "image",
completion: 0.08, // $0.080 per image
opt: {
model: "dall-e-3",
size: "1024x1792",
},
},
"dall-e-3-1792x1024-Landscape": {
...imageModelBase,
type: "image",
completion: 0.08, // $0.080 per image
opt: {
model: "dall-e-3",
size: "1792x1024",
},
},
"dall-e-3-1024x1024-HD": {
...imageModelBase,
type: "image",
completion: 0.08, // $0.080 per image
opt: {
model: "dall-e-3",
size: "1024x1024",
quality: "hd",
},
},
"dall-e-3-1024x1792-Portrait-HD": {
...imageModelBase,
type: "image",
completion: 0.12, // $0.080 per image
opt: {
model: "dall-e-3",
size: "1024x1792",
quality: "hd",
},
},
"dall-e-3-1792x1024-Landscape-HD": {
...imageModelBase,
type: "image",
completion: 0.12, // $0.080 per image
opt: {
model: "dall-e-3",
size: "1792x1024",
quality: "hd",
},
},
};
</script> </script>