235 lines
7.5 KiB
Svelte
235 lines
7.5 KiB
Svelte
<script context="module" lang="ts">
|
|
import { getApiBase, getEndpointCompletions, getEndpointGenerations, getEndpointModels, getPetals } from './ApiUtil.svelte'
|
|
import { apiKeyStorage, globalStorage } from './Storage.svelte'
|
|
import { get } from 'svelte/store'
|
|
import type { ModelDetail, Model, ResponseModels, SelectOption, Chat } from './Types.svelte'
|
|
import { encode } from 'gpt-tokenizer'
|
|
import llamaTokenizer from 'llama-tokenizer-js'
|
|
import { mergeProfileFields } from './Profiles.svelte'
|
|
import { getChatSettingObjectByKey } from './Settings.svelte'
|
|
import { valueOf } from './Util.svelte'
|
|
|
|
// Reference: https://openai.com/pricing#language-models
|
|
// Eventually we'll add API hosts and endpoints to this
|
|
const modelDetails : Record<string, ModelDetail> = {
|
|
'gpt-4-32k': {
|
|
type: 'OpenAIChat',
|
|
prompt: 0.00006, // $0.06 per 1000 tokens prompt
|
|
completion: 0.00012, // $0.12 per 1000 tokens completion
|
|
max: 32768 // 32k max token buffer
|
|
},
|
|
'gpt-4': {
|
|
type: 'OpenAIChat',
|
|
prompt: 0.00003, // $0.03 per 1000 tokens prompt
|
|
completion: 0.00006, // $0.06 per 1000 tokens completion
|
|
max: 8192 // 8k max token buffer
|
|
},
|
|
'gpt-3.5': {
|
|
type: 'OpenAIChat',
|
|
prompt: 0.0000015, // $0.0015 per 1000 tokens prompt
|
|
completion: 0.000002, // $0.002 per 1000 tokens completion
|
|
max: 4096 // 4k max token buffer
|
|
},
|
|
'gpt-3.5-turbo-16k': {
|
|
type: 'OpenAIChat',
|
|
prompt: 0.000003, // $0.003 per 1000 tokens prompt
|
|
completion: 0.000004, // $0.004 per 1000 tokens completion
|
|
max: 16384 // 16k max token buffer
|
|
},
|
|
'meta-llama/Llama-2-70b-chat-hf': {
|
|
type: 'Petals',
|
|
label: 'Petals - Llama-2-70b-chat',
|
|
stop: ['</s>'],
|
|
userStart: '[user]',
|
|
assistantStart: '[[[CHARACTER_NAME]]]',
|
|
systemStart: '',
|
|
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
|
completion: 0.000000, // $0.000 per 1000 tokens completion
|
|
max: 4096 // 4k max token buffer
|
|
}
|
|
}
|
|
|
|
export const imageModels : Record<string, ModelDetail> = {
|
|
'dall-e-1024x1024': {
|
|
type: 'OpenAIDall-e',
|
|
prompt: 0.00,
|
|
completion: 0.020, // $0.020 per image
|
|
max: 1000 // 1000 char prompt, max
|
|
},
|
|
'dall-e-512x512': {
|
|
type: 'OpenAIDall-e',
|
|
prompt: 0.00,
|
|
completion: 0.018, // $0.018 per image
|
|
max: 1000 // 1000 char prompt, max
|
|
},
|
|
'dall-e-256x256': {
|
|
type: 'OpenAIDall-e',
|
|
prompt: 0.00,
|
|
completion: 0.016, // $0.016 per image
|
|
max: 1000 // 1000 char prompt, max
|
|
}
|
|
}
|
|
|
|
const unknownDetail = {
|
|
prompt: 0,
|
|
completion: 0,
|
|
max: 4096,
|
|
type: 'OpenAIChat'
|
|
} as ModelDetail
|
|
|
|
// See: https://platform.openai.com/docs/models/model-endpoint-compatibility
|
|
// Eventually we'll add UI for managing this
|
|
export const supportedModels : Record<string, ModelDetail> = {
|
|
'gpt-4': modelDetails['gpt-4'],
|
|
'gpt-4-0314': modelDetails['gpt-4'],
|
|
'gpt-4-0613': modelDetails['gpt-4'],
|
|
'gpt-4-32k': modelDetails['gpt-4-32k'],
|
|
'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
|
|
'gpt-4-32k-0613': modelDetails['gpt-4-32k'],
|
|
'gpt-3.5-turbo': modelDetails['gpt-3.5'],
|
|
'gpt-3.5-turbo-16k': modelDetails['gpt-3.5-turbo-16k'],
|
|
'gpt-3.5-turbo-0301': modelDetails['gpt-3.5'],
|
|
'gpt-3.5-turbo-0613': modelDetails['gpt-3.5'],
|
|
'meta-llama/Llama-2-70b-chat-hf': modelDetails['meta-llama/Llama-2-70b-chat-hf']
|
|
}
|
|
|
|
const lookupList = {
|
|
...imageModels,
|
|
...modelDetails,
|
|
...supportedModels
|
|
}
|
|
|
|
export const supportedModelKeys = Object.keys({ ...supportedModels, ...imageModels })
|
|
|
|
const tpCache : Record<string, ModelDetail> = {}
|
|
|
|
export const getModelDetail = (model: Model): ModelDetail => {
|
|
// First try to get exact match, then from cache
|
|
let r = supportedModels[model] || tpCache[model]
|
|
if (r) return r
|
|
// If no exact match, find closest match
|
|
const k = Object.keys(lookupList)
|
|
.sort((a, b) => b.length - a.length) // Longest to shortest for best match
|
|
.find((k) => model.startsWith(k))
|
|
if (k) {
|
|
r = lookupList[k]
|
|
} else {
|
|
r = unknownDetail
|
|
}
|
|
// Cache it so we don't need to do that again
|
|
tpCache[model] = r
|
|
return r
|
|
}
|
|
|
|
export const getEndpoint = (model: Model): string => {
|
|
const modelDetails = getModelDetail(model)
|
|
const gSettings = get(globalStorage)
|
|
switch (modelDetails.type) {
|
|
case 'Petals':
|
|
return gSettings.pedalsEndpoint || getPetals()
|
|
case 'OpenAIDall-e':
|
|
return getApiBase() + getEndpointGenerations()
|
|
case 'OpenAIChat':
|
|
default:
|
|
return gSettings.openAICompletionEndpoint || (getApiBase() + getEndpointCompletions())
|
|
}
|
|
}
|
|
|
|
export const getStopSequence = (chat: Chat): string => {
|
|
return valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
|
|
}
|
|
|
|
export const getUserStart = (chat: Chat): string => {
|
|
return mergeProfileFields(
|
|
chat.settings,
|
|
valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
|
|
)
|
|
}
|
|
|
|
export const getAssistantStart = (chat: Chat): string => {
|
|
return mergeProfileFields(
|
|
chat.settings,
|
|
valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
|
|
)
|
|
}
|
|
|
|
export const getSystemStart = (chat: Chat): string => {
|
|
return mergeProfileFields(
|
|
chat.settings,
|
|
valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
|
|
)
|
|
}
|
|
|
|
export const getRoleTag = (role: string, model: Model, chat: Chat): string => {
|
|
const modelDetails = getModelDetail(model)
|
|
switch (modelDetails.type) {
|
|
case 'Petals':
|
|
if (role === 'assistant') return getAssistantStart(chat) + ' '
|
|
if (role === 'user') return getUserStart(chat) + ' '
|
|
return getSystemStart(chat) + ' '
|
|
case 'OpenAIDall-e':
|
|
return role
|
|
case 'OpenAIChat':
|
|
default:
|
|
return role
|
|
}
|
|
}
|
|
|
|
export const getTokens = (model: Model, value: string): number[] => {
|
|
const modelDetails = getModelDetail(model)
|
|
switch (modelDetails.type) {
|
|
case 'Petals':
|
|
return llamaTokenizer.encode(value)
|
|
case 'OpenAIDall-e':
|
|
return [0]
|
|
case 'OpenAIChat':
|
|
default:
|
|
return encode(value)
|
|
}
|
|
}
|
|
|
|
export const countTokens = (model: Model, value: string): number => {
|
|
return getTokens(model, value).length
|
|
}
|
|
|
|
export async function getModelOptions (): Promise<SelectOption[]> {
|
|
const gSettings = get(globalStorage)
|
|
const openAiKey = get(apiKeyStorage)
|
|
// Load available models from OpenAI
|
|
let openAiModels
|
|
try {
|
|
openAiModels = (await (
|
|
await fetch(getApiBase() + getEndpointModels(), {
|
|
method: 'GET',
|
|
headers: {
|
|
Authorization: `Bearer ${openAiKey}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
})
|
|
).json()) as ResponseModels
|
|
} catch (e) {
|
|
openAiModels = { data: [] }
|
|
}
|
|
const filteredModels = supportedModelKeys.filter((model) => {
|
|
switch (getModelDetail(model).type) {
|
|
case 'Petals':
|
|
return gSettings.enablePetals
|
|
case 'OpenAIChat':
|
|
default:
|
|
return openAiModels.data.find((m) => m.id === model)
|
|
}
|
|
})
|
|
|
|
const modelOptions:SelectOption[] = filteredModels.reduce((a, m) => {
|
|
const o:SelectOption = {
|
|
value: m,
|
|
text: m
|
|
}
|
|
a.push(o)
|
|
return a
|
|
}, [] as SelectOption[])
|
|
|
|
return modelOptions
|
|
}
|
|
|
|
</script> |