chatgpt-web/src/lib/Models.svelte

<script context="module" lang="ts">
    import { getApiBase, getEndpointCompletions, getEndpointGenerations, getEndpointModels, getPetals } from './ApiUtil.svelte'
    import { apiKeyStorage, globalStorage } from './Storage.svelte'
    import { get } from 'svelte/store'
    import type { ModelDetail, Model, ResponseModels, SelectOption, Chat } from './Types.svelte'
import { encode } from 'gpt-tokenizer'
import llamaTokenizer from 'llama-tokenizer-js'
    import { mergeProfileFields } from './Profiles.svelte'
    import { getChatSettingObjectByKey } from './Settings.svelte'
    import { valueOf } from './Util.svelte'

// Reference: https://openai.com/pricing#language-models
// Eventually we'll add API hosts and endpoints to this
const modelDetails : Record<string, ModelDetail> = {
      'gpt-4-32k': {
        type: 'OpenAIChat',
        prompt: 0.00006, // $0.06 per 1000 tokens prompt
        completion: 0.00012, // $0.12 per 1000 tokens completion
        max: 32768 // 32k max token buffer
      },
      'gpt-4': {
        type: 'OpenAIChat',
        prompt: 0.00003, // $0.03 per 1000 tokens prompt
        completion: 0.00006, // $0.06 per 1000 tokens completion
        max: 8192 // 8k max token buffer
      },
      'gpt-3.5': {
        type: 'OpenAIChat',
        prompt: 0.0000015, // $0.0015 per 1000 tokens prompt
        completion: 0.000002, // $0.002 per 1000 tokens completion
        max: 4096 // 4k max token buffer
      },
      'gpt-3.5-turbo-16k': {
        type: 'OpenAIChat',
        prompt: 0.000003, // $0.003 per 1000 tokens prompt
        completion: 0.000004, // $0.004 per 1000 tokens completion
        max: 16384 // 16k max token buffer
      },
      'meta-llama/Llama-2-70b-chat-hf': {
        type: 'Petals',
        label: 'Petals - Llama-2-70b-chat',
        stop: ['</s>'],
        userStart: '[user]',
        assistantStart: '[[[CHARACTER_NAME]]]',
        systemStart: '',
        prompt: 0.000000, // $0.000 per 1000 tokens prompt
        completion: 0.000000, // $0.000 per 1000 tokens completion
        max: 4096 // 4k max token buffer
      }
}

export const imageModels : Record<string, ModelDetail> = {
      'dall-e-1024x1024': {
        type: 'OpenAIDall-e',
        prompt: 0.00,
        completion: 0.020, // $0.020 per image
        max: 1000 // 1000 char prompt, max
      },
      'dall-e-512x512': {
        type: 'OpenAIDall-e',
        prompt: 0.00,
        completion: 0.018, // $0.018 per image
        max: 1000 // 1000 char prompt, max
      },
      'dall-e-256x256': {
        type: 'OpenAIDall-e',
        prompt: 0.00,
        completion: 0.016, // $0.016 per image
        max: 1000 // 1000 char prompt, max
      }
}

const unknownDetail = {
  prompt: 0,
  completion: 0,
  max: 4096,
  type: 'OpenAIChat'
} as ModelDetail

// See: https://platform.openai.com/docs/models/model-endpoint-compatibility
// Eventually we'll add UI for managing this
export const supportedModels : Record<string, ModelDetail> = {
      'gpt-4': modelDetails['gpt-4'],
      'gpt-4-0314': modelDetails['gpt-4'],
      'gpt-4-0613': modelDetails['gpt-4'],
      'gpt-4-32k': modelDetails['gpt-4-32k'],
      'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
      'gpt-4-32k-0613': modelDetails['gpt-4-32k'],
      'gpt-3.5-turbo': modelDetails['gpt-3.5'],
      'gpt-3.5-turbo-16k': modelDetails['gpt-3.5-turbo-16k'],
      'gpt-3.5-turbo-0301': modelDetails['gpt-3.5'],
      'gpt-3.5-turbo-0613': modelDetails['gpt-3.5'],
      'meta-llama/Llama-2-70b-chat-hf': modelDetails['meta-llama/Llama-2-70b-chat-hf']
}

const lookupList = {
  ...imageModels,
  ...modelDetails,
  ...supportedModels
}

export const supportedModelKeys = Object.keys({ ...supportedModels, ...imageModels })

const tpCache : Record<string, ModelDetail> = {}

export const getModelDetail = (model: Model): ModelDetail => {
      // First try to get exact match, then from cache
      let r = supportedModels[model] || tpCache[model]
      if (r) return r
      // If no exact match, find closest match
      const k = Object.keys(lookupList)
        .sort((a, b) => b.length - a.length) // Longest to shortest for best match
        .find((k) => model.startsWith(k))
      if (k) {
        r = lookupList[k]
      } else {
        r = unknownDetail
      }
      // Cache it so we don't need to do that again
      tpCache[model] = r
      return r
}

export const getEndpoint = (model: Model): string => {
  const modelDetails = getModelDetail(model)
  const gSettings = get(globalStorage)
  switch (modelDetails.type) {
        case 'Petals':
          return gSettings.pedalsEndpoint || getPetals()
        case 'OpenAIDall-e':
          return getApiBase() + getEndpointGenerations()
        case 'OpenAIChat':
        default:
          return gSettings.openAICompletionEndpoint || (getApiBase() + getEndpointCompletions())
  }
}

export const getStopSequence = (chat: Chat): string => {
  return valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
}

export const getUserStart = (chat: Chat): string => {
  return mergeProfileFields(
        chat.settings,
        valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
      )
}

export const getAssistantStart = (chat: Chat): string => {
  return mergeProfileFields(
        chat.settings,
        valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
      )
}

export const getSystemStart = (chat: Chat): string => {
  return mergeProfileFields(
        chat.settings,
        valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
      )
}

export const getRoleTag = (role: string, model: Model, chat: Chat): string => {
  const modelDetails = getModelDetail(model)
  switch (modelDetails.type) {
        case 'Petals':
          if (role === 'assistant') return getAssistantStart(chat) + ' '
          if (role === 'user') return getUserStart(chat) + ' '
          return getSystemStart(chat) + ' '
        case 'OpenAIDall-e':
          return role
        case 'OpenAIChat':
        default:
          return role
  }
}

export const getTokens = (model: Model, value: string): number[] => {
  const modelDetails = getModelDetail(model)
  switch (modelDetails.type) {
        case 'Petals':
          return llamaTokenizer.encode(value)
        case 'OpenAIDall-e':
          return [0]
        case 'OpenAIChat':
        default:
          return encode(value)
  }
}

export const countTokens = (model: Model, value: string): number => {
  return getTokens(model, value).length
}

export async function getModelOptions (): Promise<SelectOption[]> {
  const gSettings = get(globalStorage)
  const openAiKey = get(apiKeyStorage)
  // Load available models from OpenAI
  let openAiModels
  try {
        openAiModels = (await (
          await fetch(getApiBase() + getEndpointModels(), {
            method: 'GET',
            headers: {
              Authorization: `Bearer ${openAiKey}`,
              'Content-Type': 'application/json'
            }
          })
        ).json()) as ResponseModels
  } catch (e) {
        openAiModels = { data: [] }
  }
  const filteredModels = supportedModelKeys.filter((model) => {
        switch (getModelDetail(model).type) {
          case 'Petals':
            return gSettings.enablePetals
          case 'OpenAIChat':
          default:
            return openAiModels.data.find((m) => m.id === model)
        }
  })

  const modelOptions:SelectOption[] = filteredModels.reduce((a, m) => {
        const o:SelectOption = {
          value: m,
          text: m
        }
        a.push(o)
        return a
  }, [] as SelectOption[])

  return modelOptions
}

</script>