Allow user to adjust message chaining strings

This commit is contained in:
Webifi 2023-07-22 14:40:12 -05:00
parent 9a6004c55d
commit 6d35a46d50
9 changed files with 142 additions and 54 deletions

View File

@ -215,7 +215,7 @@ export class ChatRequest {
}
// Get token counts
const promptTokenCount = countPromptTokens(messagePayload, model, chatSettings)
const promptTokenCount = countPromptTokens(messagePayload, model, chat)
const maxAllowed = maxTokens - (promptTokenCount + 1)
// Build the API request body
@ -287,7 +287,8 @@ export class ChatRequest {
}
private buildHiddenPromptPrefixMessages (messages: Message[], insert:boolean = false): Message[] {
const chatSettings = this.chat.settings
const chat = this.chat
const chatSettings = chat.settings
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
const lastMessage = messages[messages.length - 1]
const isContinue = lastMessage?.role === 'assistant' && lastMessage.finish_reason === 'length'
@ -328,11 +329,11 @@ export class ChatRequest {
* Gets an estimate of how many extra tokens will be added that won't be part of the visible messages
* @param filtered
*/
private getTokenCountPadding (filtered: Message[], settings: ChatSettings): number {
private getTokenCountPadding (filtered: Message[], chat: Chat): number {
let result = 0
// add cost of hiddenPromptPrefix
result += this.buildHiddenPromptPrefixMessages(filtered)
.reduce((a, m) => a + countMessageTokens(m, this.getModel(), settings), 0)
.reduce((a, m) => a + countMessageTokens(m, this.getModel(), chat), 0)
// more here eventually?
return result
}
@ -354,10 +355,10 @@ export class ChatRequest {
}
// Get extra counts for when the prompts are finally sent.
const countPadding = this.getTokenCountPadding(filtered, chatSettings)
const countPadding = this.getTokenCountPadding(filtered, chat)
// See if we have enough to apply any of the reduction modes
const fullPromptSize = countPromptTokens(filtered, model, chatSettings) + countPadding
const fullPromptSize = countPromptTokens(filtered, model, chat) + countPadding
if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
const overMax = fullPromptSize > maxTokens * 0.95
@ -380,12 +381,12 @@ export class ChatRequest {
* *************************************************************
*/
let promptSize = countPromptTokens(top.concat(rw), model, chatSettings) + countPadding
let promptSize = countPromptTokens(top.concat(rw), model, chat) + countPadding
while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
const rolled = rw.shift()
// Hide messages we're "rolling"
if (rolled) rolled.suppress = true
promptSize = countPromptTokens(top.concat(rw), model, chatSettings) + countPadding
promptSize = countPromptTokens(top.concat(rw), model, chat) + countPadding
}
// Run a new request, now with the rolled messages hidden
return await _this.sendRequest(get(currentChatMessages), {
@ -401,26 +402,26 @@ export class ChatRequest {
const bottom = rw.slice(0 - pinBottom)
let continueCounter = chatSettings.summaryExtend + 1
rw = rw.slice(0, 0 - pinBottom)
let reductionPoolSize = countPromptTokens(rw, model, chatSettings)
let reductionPoolSize = countPromptTokens(rw, model, chat)
const ss = Math.abs(chatSettings.summarySize)
const getSS = ():number => (ss < 1 && ss > 0)
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
const topSize = countPromptTokens(top, model, chatSettings)
const topSize = countPromptTokens(top, model, chat)
let maxSummaryTokens = getSS()
let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
const summaryRequest = { role: 'user', content: promptSummary } as Message
let promptSummarySize = countMessageTokens(summaryRequest, model, chatSettings)
let promptSummarySize = countMessageTokens(summaryRequest, model, chat)
// Make sure there is enough room to generate the summary, and try to make sure
// the last prompt is a user prompt as that seems to work better for summaries
while ((topSize + reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens ||
(reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
bottom.unshift(rw.pop() as Message)
reductionPoolSize = countPromptTokens(rw, model, chatSettings)
reductionPoolSize = countPromptTokens(rw, model, chat)
maxSummaryTokens = getSS()
promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
summaryRequest.content = promptSummary
promptSummarySize = countMessageTokens(summaryRequest, model, chatSettings)
promptSummarySize = countMessageTokens(summaryRequest, model, chat)
}
if (reductionPoolSize < 50) {
if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
@ -506,10 +507,10 @@ export class ChatRequest {
// Try to get more of it
delete summaryResponse.finish_reason
_this.updatingMessage = 'Summarizing more...'
let _recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model, chatSettings)
let _recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model, chat)
while (rw.length && (_recount + maxSummaryTokens >= maxTokens)) {
rw.shift()
_recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model, chatSettings)
_recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model, chat)
}
loopCount++
continue

View File

@ -1,7 +1,7 @@
<script context="module" lang="ts">
import ChatCompletionResponse from './ChatCompletionResponse.svelte'
import ChatRequest from './ChatRequest.svelte'
import { getEndpoint, getModelDetail, getRoleTag } from './Models.svelte'
import { getEndpoint, getModelDetail, getRoleTag, getStopSequence } from './Models.svelte'
import type { ChatCompletionOpts, Message, Request } from './Types.svelte'
import { getModelMaxTokens } from './Stats.svelte'
import { updateMessages } from './Storage.svelte'
@ -13,6 +13,7 @@ export const runPetalsCompletionRequest = async (
signal: AbortSignal,
opts: ChatCompletionOpts) => {
// Petals
const chat = chatRequest.getChat()
const model = chatRequest.getModel()
const modelDetail = getModelDetail(model)
const ws = new WebSocket(getEndpoint(model))
@ -24,11 +25,10 @@ export const runPetalsCompletionRequest = async (
ws.close()
}
signal.addEventListener('abort', abortListener)
const startSequences = modelDetail.start || []
const startSequence = startSequences[0] || ''
const stopSequences = modelDetail.stop || ['###']
const stopSequence = getStopSequence(chat)
const stopSequencesC = stopSequences.slice()
const stopSequence = stopSequencesC.shift()
if (stopSequence === stopSequencesC[0]) stopSequencesC.shift()
const maxTokens = getModelMaxTokens(model)
let maxLen = Math.min(opts.maxTokens || chatRequest.chat.max_tokens || maxTokens, maxTokens)
const promptTokenCount = chatResponse.getPromptTokenCount()
@ -102,9 +102,7 @@ export const runPetalsCompletionRequest = async (
for (let i = 0, l = stopSequences.length; i < l; i++) {
if (message.content.endsWith(stopSequences[i])) {
message.content = message.content.slice(0, message.content.length - stopSequences[i].length)
const startS = startSequence[i] || ''
if (message.content.startsWith(startS)) message.content = message.content.slice(startS.length)
updateMessages(chatRequest.getChat().id)
updateMessages(chat.id)
}
}
}

View File

@ -3,7 +3,7 @@
// import { getProfile } from './Profiles.svelte'
import { cleanSettingValue, setChatSettingValue } from './Storage.svelte'
import type { Chat, ChatSetting, ChatSettings, ControlAction, FieldControl, SettingPrompt } from './Types.svelte'
import { autoGrowInputOnEvent, errorNotice } from './Util.svelte'
import { autoGrowInputOnEvent, errorNotice, valueOf } from './Util.svelte'
// import { replace } from 'svelte-spa-router'
import Fa from 'svelte-fa/src/fa.svelte'
import { openModal } from 'svelte-modals'
@ -23,13 +23,9 @@
const chatId = chat.id
let show = false
const valueOf = (value: any) => {
if (typeof value === 'function') return value(chatId, setting)
return value
}
let header = valueOf(setting.header)
let headerClass = valueOf(setting.headerClass)
let header = valueOf(chatId, setting.header)
let headerClass = valueOf(chatId, setting.headerClass)
let placeholder = valueOf(chatId, setting.placeholder)
const buildFieldControls = () => {
fieldControls = (setting.fieldControls || [] as FieldControl[]).map(fc => {
@ -46,8 +42,9 @@
afterUpdate(() => {
show = (typeof setting.hide !== 'function') || !setting.hide(chatId)
header = valueOf(setting.header)
headerClass = valueOf(setting.headerClass)
header = valueOf(chatId, setting.header)
headerClass = valueOf(chatId, setting.headerClass)
placeholder = valueOf(chatId, setting.placeholder)
buildFieldControls()
})
@ -181,7 +178,7 @@
<label class="label" for="settings-{setting.key}" title="{setting.title}">{setting.name}</label>
<textarea
class="input is-info is-focused chat-input auto-size"
placeholder={setting.placeholder || ''}
placeholder={placeholder || ''}
rows="1"
on:input={e => autoGrowInputOnEvent(e)}
on:change={e => { queueSettingValueChange(e, setting); autoGrowInputOnEvent(e) }}
@ -205,7 +202,7 @@
min={setting.min}
max={setting.max}
step={setting.step}
placeholder={String(setting.placeholder || chatDefaults[setting.key])}
placeholder={String(placeholder || chatDefaults[setting.key])}
on:change={e => queueSettingValueChange(e, setting)}
/>
{:else if setting.type === 'select' || setting.type === 'select-number'}
@ -243,6 +240,7 @@
title="{setting.title}"
class="input"
value={chatSettings[setting.key]}
placeholder={String(placeholder || chatDefaults[setting.key])}
on:change={e => { queueSettingValueChange(e, setting) }}
>
</div>

View File

@ -2,9 +2,12 @@
import { getApiBase, getEndpointCompletions, getEndpointGenerations, getEndpointModels, getPetals } from './ApiUtil.svelte'
import { apiKeyStorage, globalStorage } from './Storage.svelte'
import { get } from 'svelte/store'
import type { ModelDetail, Model, ResponseModels, SelectOption, ChatSettings } from './Types.svelte'
import type { ModelDetail, Model, ResponseModels, SelectOption, Chat } from './Types.svelte'
import { encode } from 'gpt-tokenizer'
import llamaTokenizer from 'llama-tokenizer-js'
import { mergeProfileFields } from './Profiles.svelte'
import { getChatSettingObjectByKey } from './Settings.svelte'
import { valueOf } from './Util.svelte'
// Reference: https://openai.com/pricing#language-models
// Eventually we'll add API hosts and endpoints to this
@ -36,8 +39,10 @@ const modelDetails : Record<string, ModelDetail> = {
'meta-llama/Llama-2-70b-chat-hf': {
type: 'Petals',
label: 'Petals - Llama-2-70b-chat',
start: [''],
stop: ['</s>'],
userStart: '[user]',
assistantStart: '[[[CHARACTER_NAME]]]',
systemStart: '',
prompt: 0.000000, // $0.000 per 1000 tokens prompt
completion: 0.000000, // $0.000 per 1000 tokens completion
max: 4096 // 4k max token buffer
@ -130,16 +135,38 @@ export const getEndpoint = (model: Model): string => {
}
}
export const getRoleTag = (role: string, model: Model, settings: ChatSettings): string => {
export const getStopSequence = (chat: Chat): string => {
return valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
}
export const getUserStart = (chat: Chat): string => {
return mergeProfileFields(
chat.settings,
valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
)
}
export const getAssistantStart = (chat: Chat): string => {
return mergeProfileFields(
chat.settings,
valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
)
}
export const getSystemStart = (chat: Chat): string => {
return mergeProfileFields(
chat.settings,
valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
)
}
export const getRoleTag = (role: string, model: Model, chat: Chat): string => {
const modelDetails = getModelDetail(model)
switch (modelDetails.type) {
case 'Petals':
if (role === 'assistant') {
if (settings.useSystemPrompt && settings.characterName) return '[' + settings.characterName + '] '
return '[Assistant] '
}
if (role === 'user') return '[user] '
return ''
if (role === 'assistant') return getAssistantStart(chat) + ' '
if (role === 'user') return getUserStart(chat) + ' '
return getSystemStart(chat) + ' '
case 'OpenAIDall-e':
return role
case 'OpenAIChat':

View File

@ -72,7 +72,7 @@ export const getProfile = (key:string, forReset:boolean = false):ChatSettings =>
export const mergeProfileFields = (settings: ChatSettings, content: string|undefined, maxWords: number|undefined = undefined): string => {
if (!content?.toString) return ''
content = (content + '').replaceAll('[[CHARACTER_NAME]]', settings.characterName || 'ChatGPT')
content = (content + '').replaceAll('[[CHARACTER_NAME]]', settings.characterName || 'Assistant')
if (maxWords) content = (content + '').replaceAll('[[MAX_WORDS]]', maxWords.toString())
return content
}

View File

@ -94,6 +94,10 @@ const defaults:ChatSettings = {
hppContinuePrompt: '',
hppWithSummaryPrompt: false,
imageGenerationSize: '',
stopSequence: '',
userMessageStart: '',
assistantMessageStart: '',
systemMessageStart: '',
// useResponseAlteration: false,
// responseAlterations: [],
isDirty: false
@ -414,6 +418,10 @@ const isNotOpenAI = (chatId) => {
return getModelDetail(getChatSettings(chatId).model).type !== 'OpenAIChat'
}
const isNotPetals = (chatId) => {
return getModelDetail(getChatSettings(chatId).model).type !== 'Petals'
}
const chatSettingsList: ChatSetting[] = [
profileSetting,
...systemPromptSettings,
@ -492,6 +500,50 @@ const chatSettingsList: ChatSetting[] = [
type: 'number',
hide: isNotOpenAI
},
{
key: 'stopSequence',
name: 'Stop Sequence',
title: 'Characters used to separate messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).stop
return (val && val[0]) || ''
},
hide: isNotPetals
},
{
key: 'userMessageStart',
name: 'User Message Start Sequence',
title: 'Sequence to denote user messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).userStart
return val || ''
},
hide: isNotPetals
},
{
key: 'assistantMessageStart',
name: 'Assistant Message Start Sequence',
title: 'Sequence to denote assistant messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).assistantStart
return val || ''
},
hide: isNotPetals
},
{
key: 'systemMessageStart',
name: 'System Message Start Sequence',
title: 'Sequence to denote system messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).systemStart
return val || ''
},
hide: isNotPetals
},
{
// logit bias editor not implemented yet
key: 'logit_bias',

View File

@ -1,16 +1,16 @@
<script context="module" lang="ts">
import { countTokens, getModelDetail, getRoleTag } from './Models.svelte'
import type { ChatSettings, Message, Model, Usage } from './Types.svelte'
import { countTokens, getModelDetail, getRoleTag, getStopSequence } from './Models.svelte'
import type { Chat, Message, Model, Usage } from './Types.svelte'
export const getPrice = (tokens: Usage, model: Model): number => {
const t = getModelDetail(model)
return ((tokens.prompt_tokens * t.prompt) + (tokens.completion_tokens * t.completion))
}
export const countPromptTokens = (prompts:Message[], model:Model, settings: ChatSettings):number => {
export const countPromptTokens = (prompts:Message[], model:Model, chat: Chat):number => {
const detail = getModelDetail(model)
const count = prompts.reduce((a, m) => {
a += countMessageTokens(m, model, settings)
a += countMessageTokens(m, model, chat)
return a
}, 0)
switch (detail.type) {
@ -25,13 +25,12 @@
}
}
export const countMessageTokens = (message:Message, model:Model, settings: ChatSettings):number => {
export const countMessageTokens = (message:Message, model:Model, chat: Chat):number => {
const detail = getModelDetail(model)
const start = detail.start && detail.start[0]
const stop = detail.stop && detail.stop[0]
const stop = getStopSequence(chat)
switch (detail.type) {
case 'Petals':
return countTokens(model, (start || '') + getRoleTag(message.role, model, settings) + ': ' + message.content + (stop || '###'))
return countTokens(model, getRoleTag(message.role, model, chat) + ': ' + message.content + (stop || '###'))
case 'OpenAIChat':
default:
// Not sure how OpenAI formats it, but this seems to get close to the right counts.

View File

@ -13,7 +13,9 @@ export type ModelDetail = {
type: RequestType;
label?: string;
stop?: string[];
start?: string[];
userStart?: string,
assistantStart?: string,
systemStart?: string,
prompt: number;
completion: number;
max: number;
@ -111,6 +113,10 @@ export type ChatSettings = {
trainingPrompts?: Message[];
useResponseAlteration?: boolean;
responseAlterations?: ResponseAlteration[];
stopSequence: string;
userMessageStart: string;
assistantMessageStart: string;
systemMessageStart: string;
isDirty?: boolean;
} & Request;
@ -245,6 +251,8 @@ export type SubSetting = {
settings: any[];
};
export type ValueFn = (chatId:number) => string
export type ChatSetting = {
key: keyof ChatSettings;
name: string;
@ -253,7 +261,7 @@ export type ChatSetting = {
hidden?: boolean; // Hide from setting menus
header?: string;
headerClass?: string;
placeholder?: string;
placeholder?: string | ValueFn;
hide?: (chatId:number) => boolean;
apiTransform?: (chatId:number, setting:ChatSetting, value:any) => any;
fieldControls?: FieldControl[];

View File

@ -147,4 +147,9 @@
newChat()
}
export const valueOf = (chatId: number, value: any) => {
if (typeof value === 'function') return value(chatId)
return value
}
</script>