Mor dynamic summary sizing
This commit is contained in:
parent
4289ee6cd9
commit
53a923af80
|
@ -17,11 +17,9 @@
|
||||||
resetChatSettings,
|
resetChatSettings,
|
||||||
setChatSettingValue,
|
setChatSettingValue,
|
||||||
addChatFromJSON,
|
addChatFromJSON,
|
||||||
|
|
||||||
updateRunningTotal
|
updateRunningTotal
|
||||||
|
|
||||||
} from './Storage.svelte'
|
} from './Storage.svelte'
|
||||||
import { getChatSettingObjectByKey, getChatSettingList, getRequestSettingList, getChatDefaults } from './Settings.svelte'
|
import { getChatSettingObjectByKey, getChatSettingList, getRequestSettingList, getChatDefaults, defaultModel } from './Settings.svelte'
|
||||||
import {
|
import {
|
||||||
type Request,
|
type Request,
|
||||||
type Response,
|
type Response,
|
||||||
|
@ -31,7 +29,10 @@
|
||||||
type SettingSelect,
|
type SettingSelect,
|
||||||
type Chat,
|
type Chat,
|
||||||
type SelectOption,
|
type SelectOption,
|
||||||
supportedModels
|
supportedModels,
|
||||||
|
|
||||||
|
type Usage
|
||||||
|
|
||||||
} from './Types.svelte'
|
} from './Types.svelte'
|
||||||
import Prompts from './Prompts.svelte'
|
import Prompts from './Prompts.svelte'
|
||||||
import Messages from './Messages.svelte'
|
import Messages from './Messages.svelte'
|
||||||
|
@ -57,11 +58,11 @@
|
||||||
faEraser,
|
faEraser,
|
||||||
faRotateRight
|
faRotateRight
|
||||||
} from '@fortawesome/free-solid-svg-icons/index'
|
} from '@fortawesome/free-solid-svg-icons/index'
|
||||||
import { encode } from 'gpt-tokenizer'
|
// import { encode } from 'gpt-tokenizer'
|
||||||
import { v4 as uuidv4 } from 'uuid'
|
import { v4 as uuidv4 } from 'uuid'
|
||||||
import { exportChatAsJSON, exportProfileAsJSON } from './Export.svelte'
|
import { exportChatAsJSON, exportProfileAsJSON } from './Export.svelte'
|
||||||
import { clickOutside } from 'svelte-use-click-outside'
|
import { clickOutside } from 'svelte-use-click-outside'
|
||||||
import { getPrice } from './Stats.svelte'
|
import { countPromptTokens, getMaxModelPrompt, getPrice } from './Stats.svelte'
|
||||||
|
|
||||||
// This makes it possible to override the OpenAI API base URL in the .env file
|
// This makes it possible to override the OpenAI API base URL in the .env file
|
||||||
const apiBase = import.meta.env.VITE_API_BASE || 'https://api.openai.com'
|
const apiBase = import.meta.env.VITE_API_BASE || 'https://api.openai.com'
|
||||||
|
@ -149,27 +150,27 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send API request
|
// Send API request
|
||||||
const sendRequest = async (messages: Message[], doingSummary?:boolean, withSummary?:boolean): Promise<Response> => {
|
const sendRequest = async (messages: Message[], summaryTarget:number|undefined = undefined, withSummary:boolean = false): Promise<Response> => {
|
||||||
// Show updating bar
|
// Show updating bar
|
||||||
updating = true
|
updating = true
|
||||||
|
|
||||||
|
const model = chat.settings.model || defaultModel
|
||||||
|
const maxTokens = getMaxModelPrompt(model) // max tokens for model
|
||||||
|
|
||||||
let response: Response
|
let response: Response
|
||||||
|
|
||||||
// Submit only the role and content of the messages, provide the previous messages as well for context
|
// Submit only the role and content of the messages, provide the previous messages as well for context
|
||||||
const filtered = messages.filter((message) => message.role !== 'error' && message.content && !message.summarized)
|
const filtered = messages.filter((message) => message.role !== 'error' && message.content && !message.summarized)
|
||||||
|
|
||||||
// Get an estimate of the total prompt size we're sending
|
// Get an estimate of the total prompt size we're sending
|
||||||
const promptTokenCount:number = filtered.reduce((a, m) => {
|
const promptTokenCount:number = countPromptTokens(filtered, model)
|
||||||
// Not sure how OpenAI formats it, but this seems to get close to the right counts
|
|
||||||
// Sure would be nice to know
|
let summarySize = chatSettings.summarySize
|
||||||
a += encode('## ' + m.role + ' ##:\r\n\r\n' + m.content + '\r\n\r\n\r\n').length
|
|
||||||
return a
|
|
||||||
}, 0) + 3
|
|
||||||
|
|
||||||
// console.log('Estimated',promptTokenCount,'prompt token for this request')
|
// console.log('Estimated',promptTokenCount,'prompt token for this request')
|
||||||
|
|
||||||
if (chatSettings.useSummarization &&
|
if (chatSettings.useSummarization &&
|
||||||
!withSummary && !doingSummary &&
|
!withSummary && !summaryTarget &&
|
||||||
promptTokenCount > chatSettings.summaryThreshold) {
|
promptTokenCount > chatSettings.summaryThreshold) {
|
||||||
// Too many tokens -- well need to sumarize some past ones else we'll run out of space
|
// Too many tokens -- well need to sumarize some past ones else we'll run out of space
|
||||||
// Get a block of past prompts we'll summarize
|
// Get a block of past prompts we'll summarize
|
||||||
|
@ -197,37 +198,69 @@
|
||||||
// Reduce to prompts we'll send in for summary
|
// Reduce to prompts we'll send in for summary
|
||||||
// (we may need to update this to not include the pin-top, but the context it provides seems to help in the accuracy of the summary)
|
// (we may need to update this to not include the pin-top, but the context it provides seems to help in the accuracy of the summary)
|
||||||
const summarize = filtered.slice(0, filtered.length - pinBottom)
|
const summarize = filtered.slice(0, filtered.length - pinBottom)
|
||||||
|
// Estimate token count of what we'll be summarizing
|
||||||
|
let sourceTokenCount = countPromptTokens(summarize, model)
|
||||||
|
// build summary prompt message
|
||||||
|
let summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
||||||
|
const summaryMessage = {
|
||||||
|
role: 'user',
|
||||||
|
content: summaryPrompt
|
||||||
|
} as Message
|
||||||
|
// get an estimate of how many tokens this request + max completions could be
|
||||||
|
let summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
|
||||||
|
// reduce summary size to make sure we're not requesting a summary larger than our prompts
|
||||||
|
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
||||||
|
// Make sure our prompt + completion request isn't too large
|
||||||
|
while (summarize.length - (pinTop + systemPad) >= 3 && summaryPromptSize + summarySize > maxTokens && summarySize >= 4) {
|
||||||
|
summarize.pop()
|
||||||
|
sourceTokenCount = countPromptTokens(summarize, model)
|
||||||
|
summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
|
||||||
|
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
||||||
|
}
|
||||||
|
// See if we have to adjust our max summarySize
|
||||||
|
if (summaryPromptSize + summarySize > maxTokens) {
|
||||||
|
summarySize = maxTokens - summaryPromptSize
|
||||||
|
}
|
||||||
// Always try to end the prompts being summarized with a user prompt. Seems to work better.
|
// Always try to end the prompts being summarized with a user prompt. Seems to work better.
|
||||||
while (summarize.length - (pinTop + systemPad) >= 4 && summarize[summarize.length - 1].role !== 'user') {
|
while (summarize.length - (pinTop + systemPad) >= 4 && summarize[summarize.length - 1].role !== 'user') {
|
||||||
summarize.pop()
|
summarize.pop()
|
||||||
}
|
}
|
||||||
// Estimate token count of what we'll be summarizing
|
// update with actual
|
||||||
const sourceTokenCount = summarize.reduce((a, m) => { a += encode(m.content).length + 8; return a }, 0)
|
sourceTokenCount = countPromptTokens(summarize, model)
|
||||||
|
summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
||||||
const summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
||||||
if (sourceTokenCount > 20 && summaryPrompt) {
|
summaryMessage.content = summaryPrompt
|
||||||
|
if (sourceTokenCount > 20 && summaryPrompt && summarySize > 4) {
|
||||||
// get prompt we'll be inserting after
|
// get prompt we'll be inserting after
|
||||||
const endPrompt = summarize[summarize.length - 1]
|
const endPrompt = summarize[summarize.length - 1]
|
||||||
// Add a prompt to ask to summarize them
|
// Add a prompt to ask to summarize them
|
||||||
const summarizeReq = summarize.slice()
|
const summarizeReq = summarize.slice()
|
||||||
summarizeReq.push({
|
summarizeReq.push(summaryMessage)
|
||||||
role: 'user',
|
summaryPromptSize = countPromptTokens(summarizeReq, model)
|
||||||
content: summaryPrompt
|
|
||||||
} as Message)
|
|
||||||
// Wait for the summary completion
|
// Wait for the summary completion
|
||||||
updatingMessage = 'Building Summary...'
|
updatingMessage = 'Building Summary...'
|
||||||
const summary = await sendRequest(summarizeReq, true)
|
const summary = await sendRequest(summarizeReq, summarySize)
|
||||||
if (summary.error) {
|
if (summary.error) {
|
||||||
// Failed to some API issue. let the original caller handle it.
|
// Failed to some API issue. let the original caller handle it.
|
||||||
return summary
|
return summary
|
||||||
} else {
|
} else {
|
||||||
// See if we can parse the results
|
// Get response
|
||||||
// (Make sure AI generated a good JSON response)
|
|
||||||
const summaryPromptContent: string = summary.choices.reduce((a, c) => {
|
const summaryPromptContent: string = summary.choices.reduce((a, c) => {
|
||||||
if (a.length > c.message.content.length) return a
|
if (a.length > c.message.content.length) return a
|
||||||
a = c.message.content
|
a = c.message.content
|
||||||
return a
|
return a
|
||||||
}, '')
|
}, '')
|
||||||
|
|
||||||
|
// Get use stats for response
|
||||||
|
const summaryUse = summary.choices.reduce((a, c) => {
|
||||||
|
const u = c.message.usage as Usage
|
||||||
|
a.completion_tokens += u.completion_tokens
|
||||||
|
a.prompt_tokens += u.prompt_tokens
|
||||||
|
a.total_tokens += u.total_tokens
|
||||||
|
return a
|
||||||
|
}, {prompt_tokens: 0,completion_tokens: 0,total_tokens: 0} as Usage)
|
||||||
|
|
||||||
// Looks like we got our summarized messages.
|
// Looks like we got our summarized messages.
|
||||||
// get ids of messages we summarized
|
// get ids of messages we summarized
|
||||||
const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
|
const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
|
||||||
|
@ -236,7 +269,9 @@
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: summaryPromptContent,
|
content: summaryPromptContent,
|
||||||
uuid: uuidv4(),
|
uuid: uuidv4(),
|
||||||
summary: summarizedIds
|
summary: summarizedIds,
|
||||||
|
usage: summaryUse,
|
||||||
|
model: model,
|
||||||
}
|
}
|
||||||
const summaryIds = [summaryPrompt.uuid]
|
const summaryIds = [summaryPrompt.uuid]
|
||||||
// Insert messages
|
// Insert messages
|
||||||
|
@ -251,7 +286,7 @@
|
||||||
// Re-run request with summarized prompts
|
// Re-run request with summarized prompts
|
||||||
// return { error: { message: "End for now" } } as Response
|
// return { error: { message: "End for now" } } as Response
|
||||||
updatingMessage = 'Continuing...'
|
updatingMessage = 'Continuing...'
|
||||||
return await sendRequest(chat.messages, false, true)
|
return await sendRequest(chat.messages, undefined, true)
|
||||||
}
|
}
|
||||||
} else if (!summaryPrompt) {
|
} else if (!summaryPrompt) {
|
||||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
|
addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
|
||||||
|
@ -270,14 +305,14 @@
|
||||||
// Provide the settings by mapping the settingsMap to key/value pairs
|
// Provide the settings by mapping the settingsMap to key/value pairs
|
||||||
...getRequestSettingList().reduce((acc, setting) => {
|
...getRequestSettingList().reduce((acc, setting) => {
|
||||||
let value = getChatSettingValueNullDefault(chatId, setting)
|
let value = getChatSettingValueNullDefault(chatId, setting)
|
||||||
if (doingSummary && setting.key === 'max_tokens') {
|
|
||||||
// Override for summary
|
|
||||||
// TODO: Auto adjust this above to make sure it doesn't go over avail token space
|
|
||||||
value = chatSettings.summarySize
|
|
||||||
}
|
|
||||||
if (typeof setting.apiTransform === 'function') {
|
if (typeof setting.apiTransform === 'function') {
|
||||||
value = setting.apiTransform(chatId, setting, value)
|
value = setting.apiTransform(chatId, setting, value)
|
||||||
}
|
}
|
||||||
|
if (summaryTarget) {
|
||||||
|
// requesting summary. do overrides
|
||||||
|
if (setting.key === 'max_tokens') value = summaryTarget // only as large as we need for summary
|
||||||
|
if (setting.key === 'n') value = 1 // never more than one completion
|
||||||
|
}
|
||||||
if (value !== null) acc[setting.key] = value
|
if (value !== null) acc[setting.key] = value
|
||||||
return acc
|
return acc
|
||||||
}, {})
|
}, {})
|
||||||
|
@ -323,7 +358,14 @@
|
||||||
updatingMessage = ''
|
updatingMessage = ''
|
||||||
|
|
||||||
if (!response.error) {
|
if (!response.error) {
|
||||||
|
// Add response counts to usage totals
|
||||||
updateRunningTotal(chatId, response.usage, response.model)
|
updateRunningTotal(chatId, response.usage, response.model)
|
||||||
|
// const completionTokenCount:number = response.choices.reduce((a, c) => {
|
||||||
|
// // unlike the prompts, token count of the completion is just the completion.
|
||||||
|
// a += encode(c.message.content).length
|
||||||
|
// return a
|
||||||
|
// }, 0)
|
||||||
|
// console.log('estimated response token count', completionTokenCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
@ -407,7 +449,7 @@
|
||||||
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
|
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
|
||||||
suggestMessages.push(suggestMessage)
|
suggestMessages.push(suggestMessage)
|
||||||
|
|
||||||
const response = await sendRequest(suggestMessages, true)
|
const response = await sendRequest(suggestMessages, 20)
|
||||||
|
|
||||||
if (response.error) {
|
if (response.error) {
|
||||||
addMessage(chatId, {
|
addMessage(chatId, {
|
||||||
|
|
|
@ -14,7 +14,7 @@ export const isStaticProfile = (key:string):boolean => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const getProfiles = ():Record<string, ChatSettings> => {
|
const getProfiles = ():Record<string, ChatSettings> => {
|
||||||
const result:Record<string, ChatSettings> = Object.entries(profiles
|
const result = Object.entries(profiles
|
||||||
).reduce((a, [k, v]) => {
|
).reduce((a, [k, v]) => {
|
||||||
a[k] = v
|
a[k] = v
|
||||||
return a
|
return a
|
||||||
|
@ -60,10 +60,10 @@ export const prepareProfilePrompt = (chatId:number) => {
|
||||||
return currentProfilePrompt.replaceAll('[[CHARACTER_NAME]]', characterName)
|
return currentProfilePrompt.replaceAll('[[CHARACTER_NAME]]', characterName)
|
||||||
}
|
}
|
||||||
|
|
||||||
export const prepareSummaryPrompt = (chatId:number, promptsSize:number) => {
|
export const prepareSummaryPrompt = (chatId:number, promptsSize:number, maxTokens:number|undefined = undefined) => {
|
||||||
const settings = getChatSettings(chatId)
|
const settings = getChatSettings(chatId)
|
||||||
const characterName = settings.characterName || 'ChatGPT'
|
const characterName = settings.characterName || 'ChatGPT'
|
||||||
let maxTokens:number = settings.summarySize
|
maxTokens = maxTokens || settings.summarySize
|
||||||
maxTokens = Math.min(Math.floor(promptsSize / 4), maxTokens) // Make sure we're shrinking by at least a 4th
|
maxTokens = Math.min(Math.floor(promptsSize / 4), maxTokens) // Make sure we're shrinking by at least a 4th
|
||||||
const currentSummaryPrompt = settings.summaryPrompt
|
const currentSummaryPrompt = settings.summaryPrompt
|
||||||
return currentSummaryPrompt
|
return currentSummaryPrompt
|
||||||
|
@ -72,7 +72,7 @@ export const prepareSummaryPrompt = (chatId:number, promptsSize:number) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply currently selected profile
|
// Apply currently selected profile
|
||||||
export const applyProfile = (chatId:number, key?:string, resetChat:boolean = false) => {
|
export const applyProfile = (chatId:number, key:string = '', resetChat:boolean = false) => {
|
||||||
const settings = getChatSettings(chatId)
|
const settings = getChatSettings(chatId)
|
||||||
const profile = getProfile(key || settings.profile)
|
const profile = getProfile(key || settings.profile)
|
||||||
resetChatSettings(chatId, resetChat) // Fully reset
|
resetChatSettings(chatId, resetChat) // Fully reset
|
||||||
|
|
|
@ -10,9 +10,12 @@ import {
|
||||||
type SettingSelect,
|
type SettingSelect,
|
||||||
type GlobalSetting,
|
type GlobalSetting,
|
||||||
type GlobalSettings,
|
type GlobalSettings,
|
||||||
type Request
|
type Request,
|
||||||
|
type Model
|
||||||
} from './Types.svelte'
|
} from './Types.svelte'
|
||||||
|
|
||||||
|
export const defaultModel:Model = 'gpt-3.5-turbo-0301'
|
||||||
|
|
||||||
export const getChatSettingList = (): ChatSetting[] => {
|
export const getChatSettingList = (): ChatSetting[] => {
|
||||||
return chatSettingsList
|
return chatSettingsList
|
||||||
}
|
}
|
||||||
|
@ -48,7 +51,7 @@ export const getExcludeFromProfile = () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const gptDefaults = {
|
const gptDefaults = {
|
||||||
model: 'gpt-3.5-turbo-0301',
|
model: defaultModel,
|
||||||
messages: [],
|
messages: [],
|
||||||
temperature: 1,
|
temperature: 1,
|
||||||
top_p: 1,
|
top_p: 1,
|
||||||
|
|
|
@ -1,32 +1,45 @@
|
||||||
<script context="module" lang="ts">
|
<script context="module" lang="ts">
|
||||||
// For usage stats
|
import type { Message, Model, Usage } from './Types.svelte'
|
||||||
import type { Model, Usage } from './Types.svelte'
|
import { encode } from 'gpt-tokenizer'
|
||||||
|
|
||||||
// Reference: https://openai.com/pricing#language-models
|
// Reference: https://openai.com/pricing#language-models
|
||||||
// TODO: Move to settings of some type
|
// TODO: Move to settings of some type
|
||||||
export const tokenPrice : Record<string, [number, number]> = {
|
const modelDetails : Record<string, [number, number, number]> = {
|
||||||
'gpt-4-32k': [0.00006, 0.00012], // $0.06 per 1000 tokens prompt, $0.12 per 1000 tokens completion
|
'gpt-4-32k': [0.00006, 0.00012, 32768], // $0.06 per 1000 tokens prompt, $0.12 per 1000 tokens completion, max tokens
|
||||||
'gpt-4': [0.00003, 0.00006], // $0.03 per 1000 tokens prompt, $0.06 per 1000 tokens completion
|
'gpt-4': [0.00003, 0.00006, 8192], // $0.03 per 1000 tokens prompt, $0.06 per 1000 tokens completion
|
||||||
'gpt-3.5': [0.000002, 0.000002] // $0.002 per 1000 tokens (both prompt and completion)
|
'gpt-3.5': [0.000002, 0.000002, 4096] // $0.002 per 1000 tokens (both prompt and completion)
|
||||||
}
|
}
|
||||||
|
|
||||||
const tpCache = {}
|
const tpCache = {}
|
||||||
const getTokenPrice = (model: Model) => {
|
const getModelDetail = (model: Model) => {
|
||||||
let r = tpCache[model]
|
let r = tpCache[model]
|
||||||
if (r) return r
|
if (r) return r
|
||||||
const k = Object.keys(tokenPrice).find((k) => model.startsWith(k))
|
const k = Object.keys(modelDetails).find((k) => model.startsWith(k))
|
||||||
if (k) {
|
if (k) {
|
||||||
r = tokenPrice[k]
|
r = modelDetails[k]
|
||||||
} else {
|
} else {
|
||||||
r = [0, 0]
|
r = [0, 0, 4096]
|
||||||
}
|
}
|
||||||
tpCache[model] = r
|
tpCache[model] = r
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getPrice = (tokens: Usage, model: Model): number => {
|
export const getPrice = (tokens: Usage, model: Model): number => {
|
||||||
const t = getTokenPrice(model)
|
const t = getModelDetail(model)
|
||||||
return ((tokens.prompt_tokens * t[0]) + (tokens.completion_tokens * t[1]))
|
return ((tokens.prompt_tokens * t[0]) + (tokens.completion_tokens * t[1]))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const countPromptTokens = (prompts:Message[], model:Model):number => {
|
||||||
|
return prompts.reduce((a, m) => {
|
||||||
|
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
|
||||||
|
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different
|
||||||
|
a += encode('## ' + m.role + ' ##:\r\n\r\n' + m.content + '\r\n\r\n\r\n').length
|
||||||
|
return a
|
||||||
|
}, 0) + 3
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getMaxModelPrompt = (model:Model):number => {
|
||||||
|
return getModelDetail(model)[2]
|
||||||
|
}
|
||||||
|
|
||||||
</script>
|
</script>
|
|
@ -182,7 +182,7 @@
|
||||||
const chats = get(chatsStorage)
|
const chats = get(chatsStorage)
|
||||||
const chat = chats.find((chat) => chat.id === chatId) as Chat
|
const chat = chats.find((chat) => chat.id === chatId) as Chat
|
||||||
const index = chat.messages.findIndex((m) => m.uuid === uuid)
|
const index = chat.messages.findIndex((m) => m.uuid === uuid)
|
||||||
const found = chat.messages.filter((m) => m.uuid === uuid)
|
// const found = chat.messages.filter((m) => m.uuid === uuid)
|
||||||
if (index < 0) {
|
if (index < 0) {
|
||||||
console.error(`Unable to find and delete message with ID: ${uuid}`)
|
console.error(`Unable to find and delete message with ID: ${uuid}`)
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in New Issue