Models cleanup

2026-04-14 00:14:04 +09:00 · 2025-02-28 07:01:53 +09:00
parent 795941d874
commit bace639f1d
1 changed files with 210 additions and 301 deletions
--- a/src/lib/providers/openai/models.svelte
+++ b/src/lib/providers/openai/models.svelte
@@ -1,15 +1,16 @@
 <script context="module" lang="ts">
    import { getApiBase, getEndpointCompletions, getEndpointGenerations } from '../../ApiUtil.svelte'
    import { countTokens } from '../../Models.svelte'
    import { countMessageTokens } from '../../Stats.svelte'
    import { globalStorage } from '../../Storage.svelte'
    import type { Chat, Message, Model, ModelDetail } from '../../Types.svelte'
    import { chatRequest, imageRequest } from './request.svelte'
    import { checkModel } from './util.svelte'
    import { encode } from 'gpt-tokenizer'
    import { get } from 'svelte/store'
-const hiddenSettings = {
+  import { getApiBase, getEndpointCompletions, getEndpointGenerations } from "../../ApiUtil.svelte";
  import { countTokens } from "../../Models.svelte";
  import { countMessageTokens } from "../../Stats.svelte";
  import { globalStorage } from "../../Storage.svelte";
  import type { Chat, Message, Model, ModelDetail } from "../../Types.svelte";
  import { chatRequest, imageRequest } from "./request.svelte";
  import { checkModel } from "./util.svelte";
  import { encode } from "gpt-tokenizer";
  import { get } from "svelte/store";
  const hiddenSettings = {
      startSequence: true,
      stopSequence: true,
      aggressiveStop: true,
@@ -21,301 +22,209 @@ const hiddenSettings = {
      systemMessageStart: true,
      systemMessageEnd: true,
      repetitionPenalty: true,
-      holdSocket: true
+      holdSocket: true,
      // leadPrompt: true
-} as any
+  } as any;
-const chatModelBase = {
+  const chatModelBase = {
-  type: 'chat',
+      type: "chat",
-  help: 'Below are the settings that OpenAI allows to be changed for the API calls. See the <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">OpenAI API docs</a> for more details.',
+      help: 'Below are the settings that OpenAI allows to be changed for the API calls. See the <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">OpenAI API docs</a> for more details.',
-  preFillMerge: (existingContent, newContent) => {
+      preFillMerge: (existingContent, newContent) => {
-        // continuing assistant prompt. see if we need to add a space before we merge the new completion
+          if (existingContent && !newContent.match(/^('(t|ll|ve|m|d|re)[^a-z]|\s|[.,;:(_-{}*^%$#@!?+=~`[\]])/i)) {
-        // there has to be a better way to do this
+              existingContent += " ";
-        if (existingContent && !newContent.match(/^('(t|ll|ve|m|d|re)[^a-z]|\s|[.,;:(_-{}*^%$#@!?+=~`[\]])/i)) {
+          }
-          // add a trailing space if our new content isn't a contraction
+          return existingContent;
-          existingContent += ' '
+      },
-        }
+      request: chatRequest,
-        return existingContent
+      check: checkModel,
-  },
+      getTokens: (value) => encode(value),
-  request: chatRequest,
+      getEndpoint: (model) => get(globalStorage).openAICompletionEndpoint || getApiBase() + getEndpointCompletions(),
-  check: checkModel,
+      hideSetting: (chatId, setting) => !!hiddenSettings[setting.key],
-  getTokens: (value) => encode(value),
+      countMessageTokens: (message: Message, model: Model, chat: Chat) => {
-  getEndpoint: (model) => get(globalStorage).openAICompletionEndpoint || (getApiBase() + getEndpointCompletions()),
+          return countTokens(model, "## " + message.role + " ##:\r\n\r\n" + message.content + "\r\n\r\n\r\n");
-  hideSetting: (chatId, setting) => !!hiddenSettings[setting.key],
+      },
-  countMessageTokens: (message:Message, model:Model, chat: Chat) => {
+      countPromptTokens: (prompts: Message[], model: Model, chat: Chat): number => {
-        return countTokens(model, '## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n')
+          return (
-  },
+              prompts.reduce((a, m) => {
-  countPromptTokens: (prompts:Message[], model:Model, chat: Chat):number => {
+                  a += countMessageTokens(m, model, chat);
-        // Not sure how OpenAI formats it, but this seems to get close to the right counts.
+                  return a;
-        // Would be nice to know. This works for gpt-3.5.  gpt-4 could be different.
+              }, 0) + 3
-        // Complete stab in the dark here -- update if you know where all the extra tokens really come from.
+          );
-        return prompts.reduce((a, m) => {
+      },
-          a += countMessageTokens(m, model, chat)
+  } as ModelDetail;
          return a
        }, 0) + 3 // Always seems to be message counts + 3
  }
 } as ModelDetail
-// Reference: https://openai.com/pricing#language-models
+  export const chatModels: Record<string, ModelDetail> = {
 const placeholder = {
      ...chatModelBase,
      prompt: 0.0, // $0.0015 per 1000 tokens prompt
      completion: 0.0, // $0.002 per 1000 tokens completion
      max: 100000 // 4k max token buffer
 }
 const gpt35 = {
      ...chatModelBase,
      prompt: 0.0000015, // $0.0015 per 1000 tokens prompt
      completion: 0.000002, // $0.002 per 1000 tokens completion
      max: 4096 // 4k max token buffer
 }
 const gpt3516k = {
      ...chatModelBase,
      prompt: 0.000001, // $0.001 per 1000 tokens prompt
      completion: 0.0000015, // $0.0015 per 1000 tokens completion
      max: 16384 // 16k max token buffer
 }
 const gpt4 = {
      ...chatModelBase,
      prompt: 0.00003, // $0.03 per 1000 tokens prompt
      completion: 0.00006, // $0.06 per 1000 tokens completion
      max: 8192 // 8k max token buffer
 }
 const gpt4o = {
      ...chatModelBase,
      prompt: 0.000005, // $0.005 per 1000 tokens prompt
      completion: 0.000015, // $0.015 per 1000 tokens completion
      max: 131072 // 128k max token buffer
 }
 const gpt4omini = {
      ...chatModelBase,
      prompt: 0.00000015, // $0.00015 per 1000 tokens prompt
      completion: 0.00000060, // $0.00060 per 1000 tokens completion
      max: 131072 // 128k max token buffer
 }
 const gpt432k = {
      ...chatModelBase,
      prompt: 0.00006, // $0.06 per 1000 tokens prompt
      completion: 0.00012, // $0.12 per 1000 tokens completion
      max: 32768 // 32k max token buffer
 }
 const gpt4128kpreview = {
      ...chatModelBase,
      prompt: 0.00001, // $0.01 per 1000 tokens prompt
      completion: 0.00003, // $0.03 per 1000 tokens completion
      max: 131072 // 128k max token buffer
 }
 const o1preview = {
      ...chatModelBase,
      reasoning: true,
      prompt: 0.00001, // $0.01 per 1000 tokens prompt
      completion: 0.00003, // $0.03 per 1000 tokens completion
      max: 131072 // 128k max token buffer
 }
 const o1mini = {
      ...chatModelBase,
      reasoning: true,
      prompt: 0.00001, // $0.01 per 1000 tokens prompt
      completion: 0.00003, // $0.03 per 1000 tokens completion
      max: 131072 // 128k max token buffer
 }
 const o1 = {
      ...chatModelBase,
      reasoning: true,
      prompt: 15 / 1_000_000,
      completion: 60 / 1_000_000,
      max: 200000
 }
 const o3mini = {
      ...chatModelBase,
      reasoning: true,
      prompt: 1.1 / 1_000_000,
      completion: 4.4 / 1_000_000,
      max: 200000
 }
 const llama3 = {
        ...chatModelBase,
        prompt: 0.00003,
        completion: 0.00006,
        max: 8192
      }
 const claude35sonnet = {
      ...chatModelBase,
      prompt: 0.00000375, // $0.00375 per 1000 tokens prompt
      completion: 0.000015, // $0.015 per 1000 tokens completion
      max: 8192 // 4k max token buffer
 }
 const claude35haiku = {
      ...chatModelBase,
      prompt: 1/1_000_000, // $0.001 per 1000 tokens prompt
      completion: 4/1_000_000, // $0.004 per 1000 tokens completion
      max: 4096 // 4k max token buffer
 }
-export const chatModels : Record<string, ModelDetail> = {
+      // OpenAI Models
  'gpt-3.5-turbo': { ...gpt3516k },
  'gpt-3.5-turbo-0301': { ...gpt35 },
  'gpt-3.5-turbo-0613': { ...gpt35 },
  'gpt-3.5-turbo-1106': { ...gpt3516k },
  'gpt-3.5-turbo-16k': { ...gpt3516k },
  'gpt-3.5-turbo-16k-0613': { ...gpt3516k },
  'gpt-4': { ...gpt4 },
  'gpt-4o': { ...gpt4o },
  'gpt-4o-2024-05-13': { ...gpt4o },
  'gpt-4o-2024-08-06': { ...gpt4o },
  'chatgpt-4o-latest': { ...gpt4o },
  'gpt-4o-mini': { ...gpt4omini },
  'gpt-4o-mini-2024-07-18': { ...gpt4omini },
  'gpt-4-turbo-preview': { ...gpt4128kpreview },
  'gpt-4-turbo-2024-04-09': { ...gpt4128kpreview },
  'gpt-4-0314': { ...gpt4 },
  'gpt-4-0613': { ...gpt4 },
  'gpt-4-1106-preview': { ...gpt4128kpreview },
  'gpt-4-0125-preview': { ...gpt4128kpreview },
  'gpt-4-32k': { ...gpt432k },
  'gpt-4-32k-0314': { ...gpt432k },
  'o1-preview': { ...o1preview },
  'o1-mini': { ...o1mini },
  'o1': {
      ...chatModelBase,
      reasoning: true,
      prompt:     15 / 1_000_000,
      completion: 60 / 1_000_000,
      max:        200000
  },
  'o3-mini': {
      ...chatModelBase,
      reasoning:  true,
      prompt:     1.1 / 1_000_000,
      completion: 4.4 / 1_000_000,
      max:        200000
  },
  'claude-3-7-sonnet-20250219': {
      ...chatModelBase,
      prompt:     3 / 1_000_000,
      completion: 15 / 1_000_000,
      max:        8192
  },
  'claude-3-5-sonnet-20241022': {
      ...chatModelBase,
      prompt:     3.75 / 1_000_000,
      completion: 15.0 / 1_000_000,
      max:        8192
  },
  'claude-3-5-haiku-20241022': {
      ...chatModelBase,
      prompt:     1 / 1_000_000,
      completion: 4 / 1_000_000,
      max:        4096
  },
  'deepseek-r1-distill-qwen-32b': {
      ...chatModelBase,
      prompt:     0.69 / 1_000_000,
      completion: 0.69 / 1_000_000,
      max:        16384
  },
  'deepseek-r1-distill-llama-70b': {
      ...chatModelBase,
      prompt:     3 / 1_000_000,
      completion: 3 / 1_000_000,
      max:        4096
  },
  // 'mixtral-8x7b-32768': { ...llama3 },
  // 'llama3-70b-8192': { ...llama3 },
  // 'llama3-8b-8192': { ...llama3 },
 }
-const imageModelBase = {
+      "gpt-4o-mini": {
-  type: 'image',
+          ...chatModelBase,
-  prompt: 0.00,
+          prompt:     0.15 / 1_000_000,
-  max: 1000, // 1000 char prompt, max
+          completion: 0.6  / 1_000_000,
-  request: imageRequest,
+          max:        131072,
-  check: checkModel,
+      },
-  getTokens: (value) => [0],
+      "gpt-4o": {
-  getEndpoint: (model) => getApiBase() + getEndpointGenerations(),
+          ...chatModelBase,
-  hideSetting: (chatId, setting) => false
+          prompt:     2.5 / 1_000_000,
-} as ModelDetail
+          completion: 10  / 1_000_000,
          max:        131072,
      },
      "gpt-4.5-preview": {
          ...chatModelBase,
          prompt:     75  / 1_000_000,
          completion: 150 / 1_000_000,
          max:        131072,
      },
      "o1-mini": {
          ...chatModelBase,
          reasoning: true,
          prompt:     1.1 / 1_000_000,
          completion: 4.4 / 1_000_000,
          max:        131072,
      },
      "o1": {
          ...chatModelBase,
          reasoning: true,
          prompt:     15 / 1_000_000,
          completion: 60 / 1_000_000,
          max:        200000,
      },
      "o3-mini": {
          ...chatModelBase,
          reasoning: true,
          prompt:     1.1 / 1_000_000,
          completion: 4.4 / 1_000_000,
          max:        200000,
      },
-export const imageModels : Record<string, ModelDetail> = {
+      // Anthropic Models
-      'dall-e-1024x1024': {
+
-        ...imageModelBase,
+      "claude-3-7-sonnet-20250219": {
-        completion: 0.020, // $0.020 per image
+          ...chatModelBase,
-        opt: {
+          prompt:     3  / 1_000_000,
-          size: '1024x1024'
+          completion: 15 / 1_000_000,
-        }
+          max:        200000,
      },
-      'dall-e-512x512': {
+      "claude-3-5-sonnet-20241022": {
-        ...imageModelBase,
+          ...chatModelBase,
-        completion: 0.018, // $0.018 per image
+          prompt:     3.75 / 1_000_000,
-        opt: {
+          completion: 15.0 / 1_000_000,
-          size: '512x512'
+          max:        200000,
        }
      },
-      'dall-e-256x256': {
+      "claude-3-5-haiku-20241022": {
-        ...imageModelBase,
+          ...chatModelBase,
-        type: 'image',
+          prompt:     1 / 1_000_000,
-        completion: 0.016, // $0.016 per image
+          completion: 4 / 1_000_000,
-        opt: {
+          max:        200000,
          size: '256x256'
        }
      },
-      'dall-e-3-1024x1024': {
+
-        ...imageModelBase,
+      // Groq Models
-        type: 'image',
+
-        completion: 0.04, // $0.040 per image
+      "deepseek-r1-distill-qwen-32b": {
-        opt: {
+          ...chatModelBase,
-          model: 'dall-e-3',
+          prompt:     0.69 / 1_000_000,
-          size: '1024x1024'
+          completion: 0.69 / 1_000_000,
-        }
+          max:        16384,
      },
-      'dall-e-3-1024x1792-Portrait': {
+      "deepseek-r1-distill-llama-70b": {
-        ...imageModelBase,
+          ...chatModelBase,
-        type: 'image',
+          prompt:     3 / 1_000_000,
-        completion: 0.08, // $0.080 per image
+          completion: 3 / 1_000_000,
-        opt: {
+          max:        4096,
          model: 'dall-e-3',
          size: '1024x1792'
        }
      },
-      'dall-e-3-1792x1024-Landscape': {
+  };
-        ...imageModelBase,
+
-        type: 'image',
+  const imageModelBase = {
-        completion: 0.08, // $0.080 per image
+      type: "image",
-        opt: {
+      prompt: 0.0,
-          model: 'dall-e-3',
+      max: 1000, // 1000 char prompt, max
-          size: '1792x1024'
+      request: imageRequest,
-        }
+      check: checkModel,
      getTokens: (value) => [0],
      getEndpoint: (model) => getApiBase() + getEndpointGenerations(),
      hideSetting: (chatId, setting) => false,
  } as ModelDetail;
  export const imageModels: Record<string, ModelDetail> = {
      "dall-e-1024x1024": {
          ...imageModelBase,
          completion: 0.02, // $0.020 per image
          opt: {
              size: "1024x1024",
          },
      },
-      'dall-e-3-1024x1024-HD': {
+      "dall-e-512x512": {
-        ...imageModelBase,
+          ...imageModelBase,
-        type: 'image',
+          completion: 0.018, // $0.018 per image
-        completion: 0.08, // $0.080 per image
+          opt: {
-        opt: {
+              size: "512x512",
-          model: 'dall-e-3',
+          },
          size: '1024x1024',
          quality: 'hd'
        }
      },
-      'dall-e-3-1024x1792-Portrait-HD': {
+      "dall-e-256x256": {
-        ...imageModelBase,
+          ...imageModelBase,
-        type: 'image',
+          type: "image",
-        completion: 0.12, // $0.080 per image
+          completion: 0.016, // $0.016 per image
-        opt: {
+          opt: {
-          model: 'dall-e-3',
+              size: "256x256",
-          size: '1024x1792',
+          },
          quality: 'hd'
        }
      },
-      'dall-e-3-1792x1024-Landscape-HD': {
+      "dall-e-3-1024x1024": {
-        ...imageModelBase,
+          ...imageModelBase,
-        type: 'image',
+          type: "image",
-        completion: 0.12, // $0.080 per image
+          completion: 0.04, // $0.040 per image
-        opt: {
+          opt: {
-          model: 'dall-e-3',
+              model: "dall-e-3",
-          size: '1792x1024',
+              size: "1024x1024",
-          quality: 'hd'
+          },
-        }
+      },
-      }
+      "dall-e-3-1024x1792-Portrait": {
-}
+          ...imageModelBase,
          type: "image",
          completion: 0.08, // $0.080 per image
          opt: {
              model: "dall-e-3",
              size: "1024x1792",
          },
      },
      "dall-e-3-1792x1024-Landscape": {
          ...imageModelBase,
          type: "image",
          completion: 0.08, // $0.080 per image
          opt: {
              model: "dall-e-3",
              size: "1792x1024",
          },
      },
      "dall-e-3-1024x1024-HD": {
          ...imageModelBase,
          type: "image",
          completion: 0.08, // $0.080 per image
          opt: {
              model: "dall-e-3",
              size: "1024x1024",
              quality: "hd",
          },
      },
      "dall-e-3-1024x1792-Portrait-HD": {
          ...imageModelBase,
          type: "image",
          completion: 0.12, // $0.080 per image
          opt: {
              model: "dall-e-3",
              size: "1024x1792",
              quality: "hd",
          },
      },
      "dall-e-3-1792x1024-Landscape-HD": {
          ...imageModelBase,
          type: "image",
          completion: 0.12, // $0.080 per image
          opt: {
              model: "dall-e-3",
              size: "1792x1024",
              quality: "hd",
          },
      },
  };
 </script>