Merge pull request #170 from Webifi/main

Fixes for summarization, refactor model definitions.
This commit is contained in:
Niek van der Maas 2023-06-13 09:16:09 +02:00 committed by GitHub
commit 5715594973
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 200 additions and 120 deletions

View File

@ -59,49 +59,47 @@ export class ChatRequest {
const promptTokenCount = countPromptTokens(messagePayload, model) const promptTokenCount = countPromptTokens(messagePayload, model)
const maxAllowed = maxTokens - (promptTokenCount + 1) const maxAllowed = maxTokens - (promptTokenCount + 1)
// Build and make the request // Build the API request body
try { const request: Request = {
// Build the API request body model: chatSettings.model,
const request: Request = { messages: messagePayload,
model: chatSettings.model, // Provide the settings by mapping the settingsMap to key/value pairs
messages: messagePayload, ...getRequestSettingList().reduce((acc, setting) => {
// Provide the settings by mapping the settingsMap to key/value pairs const key = setting.key
...getRequestSettingList().reduce((acc, setting) => { let value = getChatSettingValueNullDefault(chatId, setting)
const key = setting.key if (key in overrides) value = overrides[key]
let value = getChatSettingValueNullDefault(chatId, setting) if (typeof setting.apiTransform === 'function') {
if (key in overrides) value = overrides[key] value = setting.apiTransform(chatId, setting, value)
if (typeof setting.apiTransform === 'function') { }
value = setting.apiTransform(chatId, setting, value) if (key === 'max_tokens') {
} if (opts.maxTokens) value = opts.maxTokens // only as large as requested
if (key === 'max_tokens') { if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
if (opts.maxTokens) value = opts.maxTokens // only as large as requested }
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max if (key === 'n') {
} if (opts.streaming || opts.summaryRequest) {
if (key === 'n') {
if (opts.streaming || opts.summaryRequest) {
/* /*
Streaming goes insane with more than one completion. Streaming goes insane with more than one completion.
Doesn't seem like there's any way to separate the jumbled mess of deltas for the Doesn't seem like there's any way to separate the jumbled mess of deltas for the
different completions. different completions.
Summary should only have one completion Summary should only have one completion
*/ */
value = 1 value = 1
}
} }
if (value !== null) acc[key] = value }
return acc if (value !== null) acc[key] = value
}, {}), return acc
stream: opts.streaming }, {}),
} stream: opts.streaming
}
// Set-up and make the request
try {
// Add out token count to the response handler // Add out token count to the response handler
// (streaming doesn't return counts, so we need to do it client side) // (streaming doesn't return counts, so we need to do it client side)
chatResponse.setPromptTokenCount(promptTokenCount) chatResponse.setPromptTokenCount(promptTokenCount)
const signal = _this.controller.signal const signal = _this.controller.signal
// console.log('apikey', $apiKeyStorage)
const fetchOptions = { const fetchOptions = {
method: 'POST', method: 'POST',
headers: { headers: {
@ -297,19 +295,21 @@ export class ChatRequest {
*/ */
const bottom = rw.slice(0 - pinBottom) const bottom = rw.slice(0 - pinBottom)
let continueCounter = chatSettings.summaryExtend + 1
rw = rw.slice(0, 0 - pinBottom) rw = rw.slice(0, 0 - pinBottom)
let reductionPoolSize = countPromptTokens(rw, model) let reductionPoolSize = countPromptTokens(rw, model)
const ss = chatSettings.summarySize const ss = chatSettings.summarySize
const getSS = ():number => (ss < 1 && ss > 0) const getSS = ():number => (ss < 1 && ss > 0)
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced ? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count : Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
const topSize = countPromptTokens(top, model)
let maxSummaryTokens = getSS() let maxSummaryTokens = getSS()
let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens) let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
const summaryRequest = { role: 'user', content: promptSummary } as Message const summaryRequest = { role: 'user', content: promptSummary } as Message
let promptSummarySize = countMessageTokens(summaryRequest, model) let promptSummarySize = countMessageTokens(summaryRequest, model)
// Make sure there is enough room to generate the summary, and try to make sure // Make sure there is enough room to generate the summary, and try to make sure
// the last prompt is a user prompt as that seems to work better for summaries // the last prompt is a user prompt as that seems to work better for summaries
while ((reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens || while ((topSize + reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens ||
(reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) { (reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
bottom.unshift(rw.pop() as Message) bottom.unshift(rw.pop() as Message)
reductionPoolSize = countPromptTokens(rw, model) reductionPoolSize = countPromptTokens(rw, model)
@ -340,53 +340,67 @@ export class ChatRequest {
// Request and load the summarization prompt // Request and load the summarization prompt
_this.updatingMessage = 'Summarizing...' _this.updatingMessage = 'Summarizing...'
try { const summarizedIds = rw.map(m => m.uuid)
const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), { const summaryIds = [summaryResponse.uuid]
summaryRequest: true, while (continueCounter-- > 0) {
streaming: opts.streaming, try {
maxTokens: maxSummaryTokens, const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
fillMessage: summaryResponse, summaryRequest: true,
autoAddMessages: true, streaming: opts.streaming,
onMessageChange: (m) => { maxTokens: maxSummaryTokens,
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true) fillMessage: summaryResponse,
autoAddMessages: true,
onMessageChange: (m) => {
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
}
} as ChatCompletionOpts, {
temperature: 0.1, // make summary more deterministic
top_p: 1,
presence_penalty: 0,
frequency_penalty: 0,
...overrides
} as ChatSettings)
// Wait for the response to complete
if (!summary.hasFinished()) await summary.promiseToFinish()
if (summary.hasError()) {
// Failed for some API issue. let the original caller handle it.
_this.updating = false
_this.updatingMessage = ''
deleteMessage(chatId, srid)
return summary
} }
} as ChatCompletionOpts, { // Looks like we got our summarized messages.
temperature: 0, // make summary more deterministic // Mark the new summaries as such
top_p: 0.5, // Need more?
presence_penalty: 0, if (summaryResponse.finish_reason === 'length' && continueCounter > 0) {
frequency_penalty: 0, // Our summary was truncated
...overrides // Try to get more of it
} as ChatSettings) delete summaryResponse.finish_reason
// Wait for the response to complete _this.updatingMessage = 'Summarizing more...'
if (!summary.hasFinished()) await summary.promiseToFinish() continue
if (summary.hasError()) { } else {
// Failed to some API issue. let the original caller handle it. // We're done
deleteMessage(chatId, summaryResponse.uuid) continueCounter = 0
return summary }
} else { } catch (e) {
// Looks like we got our summarized messages. _this.updating = false
// Mark the new summaries as such _this.updatingMessage = ''
summaryResponse.summary = rw.map(m => m.uuid) deleteMessage(chatId, srid)
const summaryIds = [summaryResponse.uuid] throw e
// Disable the messages we summarized so they still show in history
rw.forEach((m, i) => { m.summarized = summaryIds })
saveChatStore()
// Re-run request with summarized prompts
// return { error: { message: "End for now" } } as Response
_this.updatingMessage = 'Continuing...'
scrollToBottom(true)
return await _this.sendRequest(chat.messages, {
...opts,
didSummary: true
},
overrides)
} }
} catch (e) {
_this.updating = false
_this.updatingMessage = ''
deleteMessage(chatId, srid)
throw e
} }
summaryResponse.summary = summarizedIds
// Disable the messages we summarized so they still show in history
rw.forEach((m, i) => { m.summarized = summaryIds })
saveChatStore()
// Re-run request with summarized prompts
_this.updatingMessage = 'Continuing...'
scrollToBottom(true)
return await _this.sendRequest(chat.messages, {
...opts,
didSummary: true
},
overrides)
} else { } else {
/*************** /***************
* Unknown mode. * Unknown mode.

View File

@ -177,7 +177,7 @@
placeholder={String(setting.placeholder || chatDefaults[setting.key])} placeholder={String(setting.placeholder || chatDefaults[setting.key])}
on:change={e => queueSettingValueChange(e, setting)} on:change={e => queueSettingValueChange(e, setting)}
/> />
{:else if setting.type === 'select'} {:else if setting.type === 'select' || setting.type === 'select-number'}
<!-- <div class="select"> --> <!-- <div class="select"> -->
<div class="select" class:control={fieldControls.length}> <div class="select" class:control={fieldControls.length}>
<select id="settings-{setting.key}" title="{setting.title}" on:change={e => queueSettingValueChange(e, setting) } > <select id="settings-{setting.key}" title="{setting.title}" on:change={e => queueSettingValueChange(e, setting) } >

View File

@ -13,7 +13,7 @@
checkStateChange, checkStateChange,
addChat addChat
} from './Storage.svelte' } from './Storage.svelte'
import { supportedModels, type Chat, type ChatSetting, type ResponseModels, type SettingSelect, type SelectOption, type ChatSettings } from './Types.svelte' import type { Chat, ChatSetting, ResponseModels, SettingSelect, SelectOption, ChatSettings } from './Types.svelte'
import { errorNotice, sizeTextElements } from './Util.svelte' import { errorNotice, sizeTextElements } from './Util.svelte'
import Fa from 'svelte-fa/src/fa.svelte' import Fa from 'svelte-fa/src/fa.svelte'
import { import {
@ -37,6 +37,7 @@
import { openModal } from 'svelte-modals' import { openModal } from 'svelte-modals'
import PromptConfirm from './PromptConfirm.svelte' import PromptConfirm from './PromptConfirm.svelte'
import { getApiBase, getEndpointModels } from './ApiUtil.svelte' import { getApiBase, getEndpointModels } from './ApiUtil.svelte'
import { supportedModelKeys } from './Models.svelte'
export let chatId:number export let chatId:number
export const show = () => { showSettings() } export const show = () => { showSettings() }
@ -194,7 +195,7 @@
} }
}) })
).json()) as ResponseModels ).json()) as ResponseModels
const filteredModels = supportedModels.filter((model) => allModels.data.find((m) => m.id === model)) const filteredModels = supportedModelKeys.filter((model) => allModels.data.find((m) => m.id === model))
const modelOptions:SelectOption[] = filteredModels.reduce((a, m) => { const modelOptions:SelectOption[] = filteredModels.reduce((a, m) => {
const o:SelectOption = { const o:SelectOption = {

68
src/lib/Models.svelte Normal file
View File

@ -0,0 +1,68 @@
<script context="module" lang="ts">
import type { ModelDetail, Model } from './Types.svelte'
// Reference: https://openai.com/pricing#language-models
// Eventually we'll add API hosts and endpoints to this
const modelDetails : Record<string, ModelDetail> = {
'gpt-4-32k': {
prompt: 0.00006, // $0.06 per 1000 tokens prompt
completion: 0.00012, // $0.12 per 1000 tokens completion
max: 32768 // 32k max token buffer
},
'gpt-4': {
prompt: 0.00003, // $0.03 per 1000 tokens prompt
completion: 0.00006, // $0.06 per 1000 tokens completion
max: 8192 // 8k max token buffer
},
'gpt-3.5': {
prompt: 0.000002, // $0.002 per 1000 tokens prompt
completion: 0.000002, // $0.002 per 1000 tokens completion
max: 4096 // 4k max token buffer
}
}
const unknownDetail = {
prompt: 0,
completion: 0,
max: 4096
}
// See: https://platform.openai.com/docs/models/model-endpoint-compatibility
// Eventually we'll add UI for managing this
export const supportedModels : Record<string, ModelDetail> = {
'gpt-4': modelDetails['gpt-4'],
'gpt-4-0314': modelDetails['gpt-4'],
'gpt-4-32k': modelDetails['gpt-4-32k'],
'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
'gpt-3.5-turbo': modelDetails['gpt-3.5'],
'gpt-3.5-turbo-0301': modelDetails['gpt-3.5']
}
const lookupList = {
...modelDetails,
...supportedModels
}
export const supportedModelKeys = Object.keys(supportedModels)
const tpCache : Record<string, ModelDetail> = {}
export const getModelDetail = (model: Model) => {
// First try to get exact match, then from cache
let r = supportedModels[model] || tpCache[model]
if (r) return r
// If no exact match, find closest match
const k = Object.keys(lookupList)
.sort((a, b) => b.length - a.length) // Longest to shortest for best match
.find((k) => model.startsWith(k))
if (k) {
r = lookupList[k]
} else {
r = unknownDetail
}
// Cache it so we don't need to do that again
tpCache[model] = r
return r
}
</script>

View File

@ -60,7 +60,7 @@ const gptDefaults = {
n: 1, n: 1,
stream: true, stream: true,
stop: null, stop: null,
max_tokens: 500, max_tokens: 512,
presence_penalty: 0, presence_penalty: 0,
frequency_penalty: 0, frequency_penalty: 0,
logit_bias: null, logit_bias: null,
@ -77,6 +77,7 @@ const defaults:ChatSettings = {
continuousChat: 'fifo', continuousChat: 'fifo',
summaryThreshold: 3000, summaryThreshold: 3000,
summarySize: 1000, summarySize: 1000,
summaryExtend: 0,
pinTop: 0, pinTop: 0,
pinBottom: 6, pinBottom: 6,
summaryPrompt: '', summaryPrompt: '',
@ -222,11 +223,23 @@ const summarySettings: ChatSetting[] = [
name: 'Max Summary Size', name: 'Max Summary Size',
title: 'Maximum number of tokens allowed for summary response.', title: 'Maximum number of tokens allowed for summary response.',
min: 128, min: 128,
max: 512, max: 1024,
step: 1, step: 1,
type: 'number', type: 'number',
hide: (chatId) => getChatSettings(chatId).continuousChat !== 'summary' hide: (chatId) => getChatSettings(chatId).continuousChat !== 'summary'
}, },
{
key: 'summaryExtend',
name: 'Summary Extend',
title: 'Number of times a truncated summary can be extended.',
type: 'select-number',
options: [
{ value: 0, text: '0 - Summary must fit in first call.' },
{ value: 1, text: '1 - Allow one extra API call to extend.' },
{ value: 2, text: '2 - Allow two extra API calls to extend.' }
],
hide: (chatId) => getChatSettings(chatId).continuousChat !== 'summary'
},
{ {
key: 'pinTop', key: 'pinTop',
name: 'Keep First Prompts', name: 'Keep First Prompts',

View File

@ -1,32 +1,11 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import { getModelDetail } from './Models.svelte'
import type { Message, Model, Usage } from './Types.svelte' import type { Message, Model, Usage } from './Types.svelte'
import { encode } from 'gpt-tokenizer' import { encode } from 'gpt-tokenizer'
// Reference: https://openai.com/pricing#language-models
// TODO: Move to settings of some type
const modelDetails : Record<string, [number, number, number]> = {
'gpt-4-32k': [0.00006, 0.00012, 32768], // $0.06 per 1000 tokens prompt, $0.12 per 1000 tokens completion, max 32k
'gpt-4': [0.00003, 0.00006, 8192], // $0.03 per 1000 tokens prompt, $0.06 per 1000 tokens completion, max 8k
'gpt-3.5': [0.000002, 0.000002, 4096] // $0.002 per 1000 tokens (both prompt and completion), max 4k
}
const tpCache = {}
const getModelDetail = (model: Model) => {
let r = tpCache[model]
if (r) return r
const k = Object.keys(modelDetails).find((k) => model.startsWith(k))
if (k) {
r = modelDetails[k]
} else {
r = [0, 0, 4096]
}
tpCache[model] = r
return r
}
export const getPrice = (tokens: Usage, model: Model): number => { export const getPrice = (tokens: Usage, model: Model): number => {
const t = getModelDetail(model) const t = getModelDetail(model)
return ((tokens.prompt_tokens * t[0]) + (tokens.completion_tokens * t[1])) return ((tokens.prompt_tokens * t.prompt) + (tokens.completion_tokens * t.completion))
} }
export const countPromptTokens = (prompts:Message[], model:Model):number => { export const countPromptTokens = (prompts:Message[], model:Model):number => {
@ -44,7 +23,7 @@
} }
export const getModelMaxTokens = (model:Model):number => { export const getModelMaxTokens = (model:Model):number => {
return getModelDetail(model)[2] return getModelDetail(model).max
} }
</script> </script>

View File

@ -333,6 +333,7 @@
export const cleanSettingValue = (type:string, value: any) => { export const cleanSettingValue = (type:string, value: any) => {
switch (type) { switch (type) {
case 'number': case 'number':
case 'select-number':
value = parseFloat(value) value = parseFloat(value)
if (isNaN(value)) { value = null } if (isNaN(value)) { value = null }
return value return value

View File

@ -1,15 +1,13 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
// import type internal from "stream"; import type { supportedModelKeys } from './Models.svelte'
export const supportedModels = [ // See: https://platform.openai.com/docs/models/model-endpoint-compatibility export type Model = typeof supportedModelKeys[number];
'gpt-4',
'gpt-4-0314', export type ModelDetail = {
'gpt-4-32k', prompt: number;
'gpt-4-32k-0314', completion: number;
'gpt-3.5-turbo', max: number;
'gpt-3.5-turbo-0301' };
]
export type Model = typeof supportedModels[number];
export type Usage = { export type Usage = {
completion_tokens: number; completion_tokens: number;
@ -60,6 +58,7 @@
continuousChat: (''|'fifo'|'summary'); continuousChat: (''|'fifo'|'summary');
summaryThreshold: number; summaryThreshold: number;
summarySize: number; summarySize: number;
summaryExtend: number;
pinTop: number; pinTop: number;
pinBottom: number; pinBottom: number;
summaryPrompt: string; summaryPrompt: string;
@ -141,19 +140,24 @@
}; };
export type SelectOption = { export type SelectOption = {
value: string; value: string|number;
text: string; text: string;
}; };
type SettingBoolean = { type SettingBoolean = {
type: 'boolean'; type: 'boolean';
}; };
export type SettingSelect = { export type SettingSelect = {
type: 'select'; type: 'select';
options: SelectOption[]; options: SelectOption[];
}; };
export type SettingSelectNumber = {
type: 'select-number';
options: SelectOption[];
};
export type SettingText = { export type SettingText = {
type: 'text'; type: 'text';
}; };
@ -199,7 +203,7 @@ type SettingBoolean = {
fieldControls?: FieldControl[]; fieldControls?: FieldControl[];
beforeChange?: (chatId:number, setting:ChatSetting, value:any) => boolean; beforeChange?: (chatId:number, setting:ChatSetting, value:any) => boolean;
afterChange?: (chatId:number, setting:ChatSetting, value:any) => boolean; afterChange?: (chatId:number, setting:ChatSetting, value:any) => boolean;
} & (SettingNumber | SettingSelect | SettingBoolean | SettingText | SettingTextArea | SettingOther | SubSetting); } & (SettingNumber | SettingSelect | SettingSelectNumber | SettingBoolean | SettingText | SettingTextArea | SettingOther | SubSetting);
export type GlobalSetting = { export type GlobalSetting = {