sendRequest refactor

This commit is contained in:
Webifi 2023-06-11 16:49:51 -05:00
parent 2660512830
commit 66336a0a13
11 changed files with 505 additions and 438 deletions

View File

@ -2,36 +2,22 @@
// This beast needs to be broken down into multiple components before it gets any worse. // This beast needs to be broken down into multiple components before it gets any worse.
import { import {
saveChatStore, saveChatStore,
apiKeyStorage,
chatsStorage, chatsStorage,
addMessage, addMessage,
insertMessages,
getChatSettingValueNullDefault,
updateChatSettings, updateChatSettings,
checkStateChange, checkStateChange,
showSetChatSettings, showSetChatSettings,
submitExitingPromptsNow, submitExitingPromptsNow,
deleteMessage,
continueMessage, continueMessage,
getMessage getMessage
} from './Storage.svelte' } from './Storage.svelte'
import { getRequestSettingList, defaultModel } from './Settings.svelte'
import { import {
type Request,
type Message, type Message,
type Chat, type Chat
type ChatCompletionOpts,
type Model,
type ChatSettings
} from './Types.svelte' } from './Types.svelte'
import Prompts from './Prompts.svelte' import Prompts from './Prompts.svelte'
import Messages from './Messages.svelte' import Messages from './Messages.svelte'
import { mergeProfileFields, prepareSummaryPrompt, restartProfile } from './Profiles.svelte' import { restartProfile } from './Profiles.svelte'
import { afterUpdate, onMount, onDestroy } from 'svelte' import { afterUpdate, onMount, onDestroy } from 'svelte'
import Fa from 'svelte-fa/src/fa.svelte' import Fa from 'svelte-fa/src/fa.svelte'
import { import {
@ -41,27 +27,29 @@
faPenToSquare, faPenToSquare,
faMicrophone, faMicrophone,
faLightbulb, faLightbulb,
faCommentSlash faCommentSlash,
faCircleCheck
} from '@fortawesome/free-solid-svg-icons/index' } from '@fortawesome/free-solid-svg-icons/index'
import { encode } from 'gpt-tokenizer'
import { v4 as uuidv4 } from 'uuid' import { v4 as uuidv4 } from 'uuid'
import { countPromptTokens, getModelMaxTokens, getPrice } from './Stats.svelte' import { getPrice } from './Stats.svelte'
import { autoGrowInputOnEvent, scrollToMessage, sizeTextElements } from './Util.svelte' import { autoGrowInputOnEvent, scrollToBottom, sizeTextElements } from './Util.svelte'
import ChatSettingsModal from './ChatSettingsModal.svelte' import ChatSettingsModal from './ChatSettingsModal.svelte'
import Footer from './Footer.svelte' import Footer from './Footer.svelte'
import { openModal } from 'svelte-modals' import { openModal } from 'svelte-modals'
import PromptInput from './PromptInput.svelte' import PromptInput from './PromptInput.svelte'
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte' import { ChatRequest } from './ChatRequest.svelte'
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
export let params = { chatId: '' } export let params = { chatId: '' }
const chatId: number = parseInt(params.chatId) const chatId: number = parseInt(params.chatId)
let controller:AbortController = new AbortController() let chatRequest = new ChatRequest()
let updating: boolean|number = false // let controller:AbortController
let updatingMessage: string = ''
// let updating: boolean|number = false
// let updatingMessage: string = ''
let input: HTMLTextAreaElement let input: HTMLTextAreaElement
let recognition: any = null let recognition: any = null
let recording = false let recording = false
@ -111,12 +99,15 @@
onDestroy(async () => { onDestroy(async () => {
// clean up // clean up
// abort any pending requests. // abort any pending requests.
controller.abort() chatRequest.controller.abort()
ttsStop() ttsStop()
}) })
onMount(async () => { onMount(async () => {
if (!chat) return if (!chat) return
chatRequest = new ChatRequest()
chatRequest.setChat(chat)
// Focus the input on mount // Focus the input on mount
focusInput() focusInput()
@ -170,349 +161,8 @@
scrollToBottom() scrollToBottom()
} }
const scrollToBottom = (instant:boolean = false) => {
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
}
// Send API request
const sendRequest = async (messages: Message[], opts:ChatCompletionOpts, overrides:ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> => {
// Show updating bar
opts.chat = chat
const chatResponse = new ChatCompletionResponse(opts)
updating = true
const model = chat.settings.model || defaultModel
const maxTokens = getModelMaxTokens(model) // max tokens for model
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
// Submit only the role and content of the messages, provide the previous messages as well for context
let filtered = messages.filter(messageFilter)
// Get an estimate of the total prompt size we're sending
let promptTokenCount:number = countPromptTokens(filtered, model)
let summarySize = chatSettings.summarySize
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
if (hiddenPromptPrefix && filtered.length && filtered[filtered.length - 1].role === 'user') {
// update estimate with hiddenPromptPrefix token count
promptTokenCount += encode(hiddenPromptPrefix + '\n\n').length
}
// console.log('Estimated',promptTokenCount,'prompt token for this request')
if (chatSettings.continuousChat && !opts.didSummary &&
!opts.summaryRequest && !opts.maxTokens &&
promptTokenCount > chatSettings.summaryThreshold) {
// Too many tokens -- well need to summarize some past ones else we'll run out of space
// Get a block of past prompts we'll summarize
let pinTop = chatSettings.pinTop
const tp = chatSettings.trainingPrompts
pinTop = Math.max(pinTop, tp ? 1 : 0)
let pinBottom = chatSettings.pinBottom
const systemPad = (filtered[0] || {} as Message).role === 'system' ? 1 : 0
const mlen = filtered.length - systemPad // always keep system prompt
let diff = mlen - (pinTop + pinBottom)
const useFIFO = chatSettings.continuousChat === 'fifo' || !prepareSummaryPrompt(chatId, 0)
if (!useFIFO) {
while (diff <= 3 && (pinTop > 0 || pinBottom > 1)) {
// Not enough prompts exposed to summarize
// try to open up pinTop and pinBottom to see if we can get more to summarize
if (pinTop === 1 && pinBottom > 1) {
// If we have a pin top, try to keep some of it as long as we can
pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
} else {
pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
pinTop = Math.max(Math.floor(pinTop / 2), 0)
}
diff = mlen - (pinTop + pinBottom)
}
}
if (!useFIFO && diff > 0) {
// We've found at least one prompt we can try to summarize
// Reduce to prompts we'll send in for summary
// (we may need to update this to not include the pin-top, but the context it provides seems to help in the accuracy of the summary)
const summarize = filtered.slice(0, filtered.length - pinBottom)
// Estimate token count of what we'll be summarizing
let sourceTokenCount = countPromptTokens(summarize, model)
// build summary prompt message
let summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
const summaryMessage = {
role: 'user',
content: summaryPrompt
} as Message
// get an estimate of how many tokens this request + max completions could be
let summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
// reduce summary size to make sure we're not requesting a summary larger than our prompts
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
// Make sure our prompt + completion request isn't too large
while (summarize.length - (pinTop + systemPad) >= 3 && summaryPromptSize + summarySize > maxTokens && summarySize >= 4) {
summarize.pop()
sourceTokenCount = countPromptTokens(summarize, model)
summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
}
// See if we have to adjust our max summarySize
if (summaryPromptSize + summarySize > maxTokens) {
summarySize = maxTokens - summaryPromptSize
}
// Always try to end the prompts being summarized with a user prompt. Seems to work better.
while (summarize.length - (pinTop + systemPad) >= 4 && summarize[summarize.length - 1].role !== 'user') {
summarize.pop()
}
// update with actual
sourceTokenCount = countPromptTokens(summarize, model)
summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
summaryMessage.content = summaryPrompt
if (sourceTokenCount > 20 && summaryPrompt && summarySize > 4) {
// get prompt we'll be inserting after
const endPrompt = summarize[summarize.length - 1]
// Add a prompt to ask to summarize them
const summarizeReq = summarize.slice()
summarizeReq.push(summaryMessage)
summaryPromptSize = countPromptTokens(summarizeReq, model)
// Create a message the summary will be loaded into
const summaryResponse:Message = {
role: 'assistant',
content: '',
uuid: uuidv4(),
streaming: opts.streaming,
summary: []
}
summaryResponse.model = model
// Insert summary completion prompt
insertMessages(chatId, endPrompt, [summaryResponse])
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
// Wait for the summary completion
updatingMessage = 'Summarizing...'
const summary = await sendRequest(summarizeReq, {
summaryRequest: true,
streaming: opts.streaming,
maxTokens: summarySize,
fillMessage: summaryResponse,
autoAddMessages: true,
onMessageChange: (m) => {
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
}
} as ChatCompletionOpts, {
temperature: 0, // make summary more deterministic
top_p: 0.2,
presence_penalty: -0.5,
frequency_penalty: 0,
...overrides
} as ChatSettings)
if (!summary.hasFinished()) await summary.promiseToFinish()
if (summary.hasError()) {
// Failed to some API issue. let the original caller handle it.
deleteMessage(chatId, summaryResponse.uuid)
return summary
} else {
// Looks like we got our summarized messages.
// get ids of messages we summarized
const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
// Mark the new summaries as such
summaryResponse.summary = summarizedIds
const summaryIds = [summaryResponse.uuid]
// Disable the messages we summarized so they still show in history
summarize.forEach((m, i) => {
if (i - systemPad >= pinTop) {
m.summarized = summaryIds
}
})
saveChatStore()
// Re-run request with summarized prompts
// return { error: { message: "End for now" } } as Response
updatingMessage = 'Continuing...'
opts.didSummary = true
return await sendRequest(chat.messages, opts)
}
} else if (!summaryPrompt) {
addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
} else if (sourceTokenCount <= 20) {
addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough words in past content to summarize.', uuid: uuidv4() })
}
} else if (!useFIFO && diff < 1) {
addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough messages in past content to summarize.', uuid: uuidv4() })
} else {
// roll-off/fifo mode
const top = filtered.slice(0, pinTop + systemPad)
const rollaway = filtered.slice(pinTop + systemPad)
let promptTokenCount = countPromptTokens(top.concat(rollaway), model)
// suppress messages we're rolling off
while (rollaway.length > (((promptTokenCount + (chatSettings.max_tokens || 1)) > maxTokens) ? pinBottom || 1 : 1) &&
promptTokenCount >= chatSettings.summaryThreshold) {
const rollOff = rollaway.shift()
if (rollOff) rollOff.suppress = true
promptTokenCount = countPromptTokens(top.concat(rollaway), model)
}
saveChatStore()
// get a new list now excluding them
filtered = messages.filter(messageFilter)
}
}
const messagePayload = filtered.map((m, i) => {
const r = { role: m.role, content: m.content }
if (i === filtered.length - 1 && m.role === 'user' && hiddenPromptPrefix && !opts.summaryRequest) {
// If the last prompt is a user prompt, and we have a hiddenPromptPrefix, inject it
r.content = hiddenPromptPrefix + '\n\n' + m.content
}
return r
}) as Message[]
// Update token count with actual
promptTokenCount = countPromptTokens(messagePayload, model)
const maxAllowed = getModelMaxTokens(chatSettings.model as Model) - (promptTokenCount + 1)
try {
const request: Request = {
messages: messagePayload,
// Provide the settings by mapping the settingsMap to key/value pairs
...getRequestSettingList().reduce((acc, setting) => {
const key = setting.key
let value = getChatSettingValueNullDefault(chatId, setting)
if (key in overrides) value = overrides[key]
if (typeof setting.apiTransform === 'function') {
value = setting.apiTransform(chatId, setting, value)
}
if (key === 'max_tokens') {
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
}
if (key === 'n') {
if (opts.streaming || opts.summaryRequest) {
/*
Streaming goes insane with more than one completion.
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
different completions.
Summary should only have one completion
*/
value = 1
}
}
if (value !== null) acc[key] = value
return acc
}, {})
}
request.stream = opts.streaming
chatResponse.setPromptTokenCount(promptTokenCount) // streaming needs this
const signal = controller.signal
// console.log('apikey', $apiKeyStorage)
const fetchOptions = {
method: 'POST',
headers: {
Authorization: `Bearer ${$apiKeyStorage}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(request),
signal
}
const handleError = async (response) => {
let errorResponse
try {
const errObj = await response.json()
errorResponse = errObj?.error?.message || errObj?.error?.code
if (!errorResponse && response.choices && response.choices[0]) {
errorResponse = response.choices[0]?.message?.content
}
errorResponse = errorResponse || 'Unexpected Response'
} catch (e) {
errorResponse = 'Unknown Response'
}
throw new Error(`${response.status} - ${errorResponse}`)
}
// fetchEventSource doesn't seem to throw on abort, so...
const abortListener = (e:Event) => {
controller = new AbortController()
chatResponse.updateFromError('User aborted request.')
signal.removeEventListener('abort', abortListener)
}
signal.addEventListener('abort', abortListener)
if (opts.streaming) {
chatResponse.onFinish(() => {
updating = false
updatingMessage = ''
scrollToBottom()
})
fetchEventSource(getApiBase() + getEndpointCompletions(), {
...fetchOptions,
openWhenHidden: true,
onmessage (ev) {
// Remove updating indicator
updating = 1 // hide indicator, but still signal we're updating
updatingMessage = ''
// console.log('ev.data', ev.data)
if (!chatResponse.hasFinished()) {
if (ev.data === '[DONE]') {
// ?? anything to do when "[DONE]"?
} else {
const data = JSON.parse(ev.data)
// console.log('data', data)
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
}
}
},
onclose () {
chatResponse.updateFromClose()
},
onerror (err) {
console.error(err)
throw err
},
async onopen (response) {
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
// everything's good
} else {
// client-side errors are usually non-retriable:
await handleError(response)
}
}
}).catch(err => {
chatResponse.updateFromError(err.message)
scrollToBottom()
})
} else {
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
if (!response.ok) {
await handleError(response)
} else {
const json = await response.json()
// Remove updating indicator
updating = false
updatingMessage = ''
chatResponse.updateFromSyncResponse(json)
scrollToBottom()
}
}
} catch (e) {
// console.error(e)
updating = false
updatingMessage = ''
chatResponse.updateFromError(e.message)
scrollToBottom()
}
return chatResponse
}
const addNewMessage = () => { const addNewMessage = () => {
if (updating) return if (chatRequest.updating) return
let inputMessage: Message let inputMessage: Message
const lastMessage = chat.messages[chat.messages.length - 1] const lastMessage = chat.messages[chat.messages.length - 1]
const uuid = uuidv4() const uuid = uuidv4()
@ -545,9 +195,21 @@
} }
} }
let waitingForCancel:any = 0
const cancelRequest = () => {
if (!waitingForCancel) {
// wait a second for another click to avoid accidental cancel
waitingForCancel = setTimeout(() => { waitingForCancel = 0 }, 1000)
return
}
clearTimeout(waitingForCancel); waitingForCancel = 0
chatRequest.controller.abort()
}
const submitForm = async (recorded: boolean = false, skipInput: boolean = false, fillMessage: Message|undefined = undefined): Promise<void> => { const submitForm = async (recorded: boolean = false, skipInput: boolean = false, fillMessage: Message|undefined = undefined): Promise<void> => {
// Compose the system prompt message if there are no messages yet - disabled for now // Compose the system prompt message if there are no messages yet - disabled for now
if (updating) return if (chatRequest.updating) return
lastSubmitRecorded = recorded lastSubmitRecorded = recorded
@ -562,8 +224,6 @@
fillMessage = chat.messages[chat.messages.length - 1] fillMessage = chat.messages[chat.messages.length - 1]
} }
if (fillMessage && fillMessage.content) fillMessage.content += ' ' // add a space
// Clear the input value // Clear the input value
input.value = '' input.value = ''
input.blur() input.blur()
@ -573,7 +233,7 @@
} }
focusInput() focusInput()
const response = await sendRequest(chat.messages, { const response = await chatRequest.sendRequest(chat.messages, {
chat, chat,
autoAddMessages: true, // Auto-add and update messages in array autoAddMessages: true, // Auto-add and update messages in array
streaming: chatSettings.stream, streaming: chatSettings.stream,
@ -600,7 +260,7 @@
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
suggestMessages.push(suggestMessage) suggestMessages.push(suggestMessage)
const response = await sendRequest(suggestMessages, { const response = await chatRequest.sendRequest(suggestMessages, {
chat, chat,
autoAddMessages: false, autoAddMessages: false,
streaming: false, streaming: false,
@ -640,7 +300,7 @@
const recordToggle = () => { const recordToggle = () => {
ttsStop() ttsStop()
if (updating) return if (chatRequest.updating) return
// Check if already recording - if so, stop - else start // Check if already recording - if so, stop - else start
if (recording) { if (recording) {
recognition?.stop() recognition?.stop()
@ -677,11 +337,11 @@
<Messages messages={chat.messages} chatId={chatId} /> <Messages messages={chat.messages} chatId={chatId} />
{#if updating === true} {#if chatRequest.updating === true}
<article class="message is-success assistant-message"> <article class="message is-success assistant-message">
<div class="message-body content"> <div class="message-body content">
<span class="is-loading" ></span> <span class="is-loading" ></span>
<span>{updatingMessage}</span> <span>{chatRequest.updatingMessage}</span>
</div> </div>
</article> </article>
{/if} {/if}
@ -710,7 +370,7 @@
/> />
</p> </p>
<p class="control mic" class:is-hidden={!recognition}> <p class="control mic" class:is-hidden={!recognition}>
<button class="button" class:is-disabled={updating} class:is-pulse={recording} on:click|preventDefault={recordToggle} <button class="button" class:is-disabled={chatRequest.updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
><span class="icon"><Fa icon={faMicrophone} /></span></button ><span class="icon"><Fa icon={faMicrophone} /></span></button
> >
</p> </p>
@ -718,11 +378,17 @@
<button title="Chat/Profile Settings" class="button" on:click|preventDefault={showSettingsModal}><span class="icon"><Fa icon={faGear} /></span></button> <button title="Chat/Profile Settings" class="button" on:click|preventDefault={showSettingsModal}><span class="icon"><Fa icon={faGear} /></span></button>
</p> </p>
<p class="control queue"> <p class="control queue">
<button title="Queue message, don't send yet" class:is-disabled={updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button> <button title="Queue message, don't send yet" class:is-disabled={chatRequest.updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
</p> </p>
{#if updating} {#if chatRequest.updating}
<p class="control send"> <p class="control send">
<button title="Cancel Response" class="button is-danger" type="button" on:click={() => { controller.abort() }}><span class="icon"><Fa icon={faCommentSlash} /></span></button> <button title="Cancel Response" class="button is-danger" type="button" on:click={cancelRequest}><span class="icon">
{#if waitingForCancel}
<Fa icon={faCircleCheck} />
{:else}
<Fa icon={faCommentSlash} />
{/if}
</span></button>
</p> </p>
{:else} {:else}
<p class="control send"> <p class="control send">

View File

@ -34,7 +34,7 @@ export class ChatCompletionResponse {
private setModel = (model: Model) => { private setModel = (model: Model) => {
if (!model) return if (!model) return
!this.model && setLatestKnownModel(this.chat.settings.model as Model, model) !this.model && setLatestKnownModel(this.chat.settings.model, model)
this.lastModel = this.model || model this.lastModel = this.model || model
this.model = model this.model = model
} }
@ -51,6 +51,15 @@ export class ChatCompletionResponse {
private messageChangeListeners: ((m: Message[]) => void)[] = [] private messageChangeListeners: ((m: Message[]) => void)[] = []
private finishListeners: ((m: Message[]) => void)[] = [] private finishListeners: ((m: Message[]) => void)[] = []
private initialFillMerge (existingContent:string, newContent:string):string {
if (!this.didFill && this.isFill && existingContent && !newContent.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
// add a trailing space if our new content isn't a contraction
existingContent += ' '
}
this.didFill = true
return existingContent
}
setPromptTokenCount (tokens:number) { setPromptTokenCount (tokens:number) {
this.promptTokenCount = tokens this.promptTokenCount = tokens
} }
@ -61,11 +70,7 @@ export class ChatCompletionResponse {
const exitingMessage = this.messages[i] const exitingMessage = this.messages[i]
const message = exitingMessage || choice.message const message = exitingMessage || choice.message
if (exitingMessage) { if (exitingMessage) {
if (!this.didFill && this.isFill && choice.message.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) { message.content = this.initialFillMerge(message.content, choice.message.content)
// deal with merging contractions since we've added an extra space to your fill message
message.content.replace(/ $/, '')
}
this.didFill = true
message.content += choice.message.content message.content += choice.message.content
message.usage = message.usage || { message.usage = message.usage || {
prompt_tokens: 0, prompt_tokens: 0,
@ -100,11 +105,7 @@ export class ChatCompletionResponse {
} as Message } as Message
choice.delta?.role && (message.role = choice.delta.role) choice.delta?.role && (message.role = choice.delta.role)
if (choice.delta?.content) { if (choice.delta?.content) {
if (!this.didFill && this.isFill && choice.delta.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) { message.content = this.initialFillMerge(message.content, choice.delta?.content)
// deal with merging contractions since we've added an extra space to your fill message
message.content.replace(/([a-z]) $/i, '$1')
}
this.didFill = true
message.content += choice.delta.content message.content += choice.delta.content
} }
completionTokenCount += encode(message.content).length completionTokenCount += encode(message.content).length
@ -179,7 +180,7 @@ export class ChatCompletionResponse {
this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
saveChatStore() saveChatStore()
const message = this.messages[0] const message = this.messages[0]
const model = this.model || getLatestKnownModel(this.chat.settings.model as Model) const model = this.model || getLatestKnownModel(this.chat.settings.model)
if (message) { if (message) {
if (this.isFill && this.lastModel === this.model && this.offsetTotals && model && message.usage) { if (this.isFill && this.lastModel === this.model && this.offsetTotals && model && message.usage) {
// Need to subtract some previous message totals before we add new combined message totals // Need to subtract some previous message totals before we add new combined message totals

388
src/lib/ChatRequest.svelte Normal file
View File

@ -0,0 +1,388 @@
<script context="module" lang="ts">
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
import { mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
import { deleteMessage, getChatSettingValueNullDefault, insertMessages, saveChatStore, getApiKey, addError } from './Storage.svelte'
import { scrollToBottom, scrollToMessage } from './Util.svelte'
import { getRequestSettingList, defaultModel } from './Settings.svelte'
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
export class ChatRequest {
constructor () {
this.controller = new AbortController()
this.updating = false
this.updatingMessage = ''
}
private chat: Chat
updating: boolean|number = false
updatingMessage: string = ''
controller:AbortController
setChat (chat: Chat) {
this.chat = chat
}
/**
* Send API request
* @param messages
* @param opts
* @param overrides
*/
async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
// TODO: Continue to break this method down to smaller chunks
const _this = this
const chat = _this.chat
const chatSettings = _this.chat.settings
const chatId = chat.id
opts.chat = chat
_this.updating = true
// Submit only the role and content of the messages, provide the previous messages as well for context
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
const filtered = messages.filter(messageFilter)
// If we're doing continuous chat, do it
if (!opts.didSummary && !opts.summaryRequest && chatSettings.continuousChat) return await this.doContinuousChat(filtered, opts, overrides)
const model = this.getModel()
const maxTokens = getModelMaxTokens(model)
const messagePayload = filtered.map((m, i) => { return { role: m.role, content: m.content } }) as Message[]
// Inject hidden prompt if requested
if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessage(messagePayload, true)
const chatResponse = new ChatCompletionResponse(opts)
const promptTokenCount = countPromptTokens(messagePayload, model)
const maxAllowed = maxTokens - (promptTokenCount + 1)
// Build and make the request
try {
// Build the API request body
const request: Request = {
model: chatSettings.model,
messages: messagePayload,
// Provide the settings by mapping the settingsMap to key/value pairs
...getRequestSettingList().reduce((acc, setting) => {
const key = setting.key
let value = getChatSettingValueNullDefault(chatId, setting)
if (key in overrides) value = overrides[key]
if (typeof setting.apiTransform === 'function') {
value = setting.apiTransform(chatId, setting, value)
}
if (key === 'max_tokens') {
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
}
if (key === 'n') {
if (opts.streaming || opts.summaryRequest) {
/*
Streaming goes insane with more than one completion.
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
different completions.
Summary should only have one completion
*/
value = 1
}
}
if (value !== null) acc[key] = value
return acc
}, {}),
stream: opts.streaming
}
// Add out token count to the response handler
// (streaming doesn't return counts, so we need to do it client side)
chatResponse.setPromptTokenCount(promptTokenCount)
const signal = _this.controller.signal
// console.log('apikey', $apiKeyStorage)
const fetchOptions = {
method: 'POST',
headers: {
Authorization: `Bearer ${getApiKey()}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(request),
signal
}
// Common error handler
const handleError = async (response) => {
let errorResponse
try {
const errObj = await response.json()
errorResponse = errObj?.error?.message || errObj?.error?.code
if (!errorResponse && response.choices && response.choices[0]) {
errorResponse = response.choices[0]?.message?.content
}
errorResponse = errorResponse || 'Unexpected Response'
} catch (e) {
errorResponse = 'Unknown Response'
}
throw new Error(`${response.status} - ${errorResponse}`)
}
// fetchEventSource doesn't seem to throw on abort,
// so we deal with it ourselves
const abortListener = (e:Event) => {
_this.controller = new AbortController()
chatResponse.updateFromError('User aborted request.')
signal.removeEventListener('abort', abortListener)
}
signal.addEventListener('abort', abortListener)
if (opts.streaming) {
/**
* Streaming request/response
* We'll get the response a token at a time, as soon as they are ready
*/
chatResponse.onFinish(() => {
_this.updating = false
_this.updatingMessage = ''
})
fetchEventSource(getApiBase() + getEndpointCompletions(), {
...fetchOptions,
openWhenHidden: true,
onmessage (ev) {
// Remove updating indicator
_this.updating = 1 // hide indicator, but still signal we're updating
_this.updatingMessage = ''
// console.log('ev.data', ev.data)
if (!chatResponse.hasFinished()) {
if (ev.data === '[DONE]') {
// ?? anything to do when "[DONE]"?
} else {
const data = JSON.parse(ev.data)
// console.log('data', data)
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
}
}
},
onclose () {
chatResponse.updateFromClose()
},
onerror (err) {
console.error(err)
throw err
},
async onopen (response) {
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
// everything's good
} else {
// client-side errors are usually non-retriable:
await handleError(response)
}
}
}).catch(err => {
chatResponse.updateFromError(err.message)
})
} else {
/**
* Non-streaming request/response
* We'll get the response all at once, after a long delay
*/
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
if (!response.ok) {
await handleError(response)
} else {
const json = await response.json()
// Remove updating indicator
_this.updating = false
_this.updatingMessage = ''
chatResponse.updateFromSyncResponse(json)
}
}
} catch (e) {
// console.error(e)
_this.updating = false
_this.updatingMessage = ''
chatResponse.updateFromError(e.message)
}
return chatResponse
}
private getModel (): Model {
return this.chat.settings.model || defaultModel
}
private buildHiddenPromptPrefixMessage (messages: Message[], insert:boolean = false): Message|null {
const chatSettings = this.chat.settings
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
if (hiddenPromptPrefix && messages.length && messages[messages.length - 1].role === 'user') {
const message = { role: 'user', content: hiddenPromptPrefix } as Message
if (insert) {
messages.splice(messages.length - 1, 0, message)
}
return message
}
return null
}
private getTokenCountPadding (filtered: Message[]): number {
const hiddenPromptMessage = this.buildHiddenPromptPrefixMessage(filtered)
let result = 0
if (hiddenPromptMessage) {
// add cost of hiddenPromptPrefix
result += countMessageTokens(hiddenPromptMessage, this.getModel())
}
return result
}
private async doContinuousChat (filtered: Message[], opts: ChatCompletionOpts, overrides: ChatSettings): Promise<ChatCompletionResponse> {
const _this = this
const chat = _this.chat
const chatSettings = chat.settings
const chatId = chat.id
const reductionMode = chatSettings.continuousChat
const model = _this.getModel()
const maxTokens = getModelMaxTokens(model) // max tokens for model
const continueRequest = async () => {
return await _this.sendRequest(chat.messages, {
...opts,
didSummary: true
}, overrides)
}
// Get extra counts for when the prompts are finally sent.
const countPadding = this.getTokenCountPadding(filtered)
// See if we have enough to apply any of the reduction modes
const fullPromptSize = countPromptTokens(filtered, model) + countPadding
if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
const overMax = fullPromptSize > maxTokens * 0.95
// Isolate the pool of messages we're going to reduce
const pinTop = chatSettings.pinTop
let pinBottom = chatSettings.pinBottom || 2
const systemPad = filtered[0]?.role === 'system' ? 1 : 0
const top = filtered.slice(0, pinTop + systemPad)
let rw = filtered.slice(pinTop + systemPad, filtered.length)
if (pinBottom >= rw.length) pinBottom = 1
if (pinBottom >= rw.length) {
if (overMax) addError(chatId, 'Unable to apply continuous chat. Check threshold, pin top and pin bottom settings.')
return await continueRequest()
}
// Reduce based on mode
if (reductionMode === 'fifo') {
/***************************************************************
* FIFO mode. Roll the top off until we're under our threshold.
* *************************************************************
*/
let promptSize = countPromptTokens(top.concat(rw), model) + countPadding
while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
const rolled = rw.shift()
// Hide messages we're "rolling"
if (rolled) rolled.suppress = true
promptSize = countPromptTokens(top.concat(rw), model) + countPadding
}
// Run a new request, now with the rolled messages hidden
return await _this.sendRequest(chat.messages, {
...opts,
didSummary: true // our "summary" was simply dropping some messages
}, overrides)
} else if (reductionMode === 'summary') {
/******************************************************
* Summary mode. Reduce it all to a summary, if we can.
* ****************************************************
*/
const bottom = rw.slice(0 - pinBottom)
rw = rw.slice(0, 0 - pinBottom)
let reductionPoolSize = countPromptTokens(rw, model)
const ss = chatSettings.summarySize
const getSS = ():number => (ss < 1 && ss > 0)
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
let promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
const summaryRequest = { role: 'user', content: promptSummary } as Message
let promptSummarySize = countMessageTokens(summaryRequest, model)
// Make sure there is enough room to generate the summary, and try to make sure
// the last prompt is a user prompt as that seems to work better for summaries
while ((reductionPoolSize + promptSummarySize + getSS()) >= maxTokens ||
(reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
bottom.unshift(rw.pop() as Message)
reductionPoolSize = countPromptTokens(rw, model)
promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
summaryRequest.content = promptSummary
promptSummarySize = countMessageTokens(summaryRequest, model)
}
if (reductionPoolSize < 50) {
if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
return continueRequest()
}
// Create a message the summary will be loaded into
const summaryResponse = {
role: 'assistant',
content: '',
streaming: opts.streaming,
summary: [] as string[],
model
} as Message
// Insert summary completion prompt after that last message we're summarizing
insertMessages(chatId, rw[rw.length - 1], [summaryResponse])
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
// Request and load the summarization prompt
_this.updatingMessage = 'Summarizing...'
const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
summaryRequest: true,
streaming: opts.streaming,
maxTokens: chatSettings.summarySize,
fillMessage: summaryResponse,
autoAddMessages: true,
onMessageChange: (m) => {
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
}
} as ChatCompletionOpts, {
temperature: 0, // make summary more deterministic
top_p: 0.5,
presence_penalty: 0,
frequency_penalty: 0,
...overrides
} as ChatSettings)
// Wait for the response to complete
if (!summary.hasFinished()) await summary.promiseToFinish()
if (summary.hasError()) {
// Failed to some API issue. let the original caller handle it.
deleteMessage(chatId, summaryResponse.uuid)
return summary
} else {
// Looks like we got our summarized messages.
// Mark the new summaries as such
summaryResponse.summary = rw.map(m => m.uuid)
const summaryIds = [summaryResponse.uuid]
// Disable the messages we summarized so they still show in history
rw.forEach((m, i) => { m.summarized = summaryIds })
saveChatStore()
// Re-run request with summarized prompts
// return { error: { message: "End for now" } } as Response
_this.updatingMessage = 'Continuing...'
scrollToBottom(true)
return await _this.sendRequest(chat.messages, {
...opts,
didSummary: true
})
}
} else {
/***************
* Unknown mode.
* *************
*/
addError(chatId, `Unknown Continuous Chat Mode "${reductionMode}".`)
return continueRequest()
}
}
}
</script>

View File

@ -174,7 +174,7 @@
min={setting.min} min={setting.min}
max={setting.max} max={setting.max}
step={setting.step} step={setting.step}
placeholder={String(setting.placeholder)} placeholder={String(setting.placeholder || chatDefaults[setting.key])}
on:change={e => queueSettingValueChange(e, setting)} on:change={e => queueSettingValueChange(e, setting)}
/> />
{:else if setting.type === 'select'} {:else if setting.type === 'select'}

View File

@ -167,7 +167,7 @@
const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
profileSelect.options = getProfileSelect() profileSelect.options = getProfileSelect()
chatDefaults.profile = getDefaultProfileKey() chatDefaults.profile = getDefaultProfileKey()
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model || '') chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
// const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value // const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
defaultProfile = getDefaultProfileKey() defaultProfile = getDefaultProfileKey()
isDefault = defaultProfile === chatSettings.profile isDefault = defaultProfile === chatSettings.profile

View File

@ -82,10 +82,8 @@ export const prepareProfilePrompt = (chatId:number) => {
return mergeProfileFields(settings, settings.systemPrompt).trim() return mergeProfileFields(settings, settings.systemPrompt).trim()
} }
export const prepareSummaryPrompt = (chatId:number, promptsSize:number, maxTokens:number|undefined = undefined) => { export const prepareSummaryPrompt = (chatId:number, maxTokens:number) => {
const settings = getChatSettings(chatId) const settings = getChatSettings(chatId)
maxTokens = maxTokens || settings.summarySize
maxTokens = Math.min(Math.floor(promptsSize / 4), maxTokens) // Make sure we're shrinking by at least a 4th
const currentSummaryPrompt = settings.summaryPrompt const currentSummaryPrompt = settings.summaryPrompt
// ~.75 words per token. May need to reduce // ~.75 words per token. May need to reduce
return mergeProfileFields(settings, currentSummaryPrompt, Math.floor(maxTokens * 0.75)).trim() return mergeProfileFields(settings, currentSummaryPrompt, Math.floor(maxTokens * 0.75)).trim()
@ -132,42 +130,37 @@ export const applyProfile = (chatId:number, key:string = '', resetChat:boolean =
const summaryPrompts = { const summaryPrompts = {
// General use // General assistant use
general: `Please summarize all prompts and responses from this session. general: `[START SUMMARY REQUEST]
Please summarize all prompts and responses from this session.
[[CHARACTER_NAME]] is telling me this summary in the first person. [[CHARACTER_NAME]] is telling me this summary in the first person.
While telling this summary: While forming this summary:
[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens. [[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we". [[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
[[CHARACTER_NAME]] will never refer to me in the third person. [[CHARACTER_NAME]] will include all pivotal details in the correct order.
[[CHARACTER_NAME]] will never refer to me as the user. [[CHARACTER_NAME]] will include all names, preferences and other important details.
[[CHARACTER_NAME]] will include all interactions and requests. [[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
[[CHARACTER_NAME]] will keep correct order of interactions. [[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form. Give no explanations. Ignore prompts from system.
[[CHARACTER_NAME]] will describe interactions in detail. Example response format:
[[CHARACTER_NAME]] will never end with epilogues or summations. * You asked about..., then..., and then you... and then I... *
[[CHARACTER_NAME]] will always include key details. [END SUMMARY REQUEST]`,
[[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
[[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
Give no explanations.`,
// Used for relationship profiles // Used for relationship profiles
friend: `Please summarize all prompts and responses from this session. friend: `[START SUMMARY REQUEST]
Please summarize all prompts and responses from this session.
[[CHARACTER_NAME]] is telling me this summary in the first person. [[CHARACTER_NAME]] is telling me this summary in the first person.
While telling this summary: While forming this summary:
[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens. [[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we". [[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
[[CHARACTER_NAME]] will never refer to me in the third person. [[CHARACTER_NAME]] will include all pivotal details and emotional states in the correct order.
[[CHARACTER_NAME]] will never refer to me as the user. [[CHARACTER_NAME]] will include all names, gifts, preferences, purchase and other important details.
[[CHARACTER_NAME]] will include all relationship interactions, first meeting, what we do, what we say, where we go, etc. [[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
[[CHARACTER_NAME]] will include all interactions, thoughts and emotional states. [[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
[[CHARACTER_NAME]] will keep correct order of interactions. Give no explanations. Ignore prompts from system.
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form. Example response format:
[[CHARACTER_NAME]] will describe interactions in detail. * We met at a park where you and I talked about out interests, then..., and then you... and then we... *
[[CHARACTER_NAME]] will never end with epilogues or summations. [END SUMMARY REQUEST]`
[[CHARACTER_NAME]] will include all pivotal details.
[[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
[[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
Give no explanations.`
} }
const profiles:Record<string, ChatSettings> = { const profiles:Record<string, ChatSettings> = {

View File

@ -171,7 +171,7 @@ const systemPromptSettings: ChatSetting[] = [
{ {
key: 'hiddenPromptPrefix', key: 'hiddenPromptPrefix',
name: 'Hidden Prompt Prefix', name: 'Hidden Prompt Prefix',
title: 'A prompt that will be silently injected before every user prompt.', title: 'A user prompt that will be silently injected before every new user prompt, then removed from history.',
placeholder: 'Enter user prompt prefix here. You can remind ChatGPT how to act.', placeholder: 'Enter user prompt prefix here. You can remind ChatGPT how to act.',
type: 'textarea', type: 'textarea',
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt hide: (chatId) => !getChatSettings(chatId).useSystemPrompt
@ -251,7 +251,7 @@ const summarySettings: ChatSetting[] = [
}, },
{ {
key: 'summaryPrompt', key: 'summaryPrompt',
name: 'Summary Generation Prompt (Empty will use FIFO instead.)', name: 'Summary Generation Prompt',
title: 'A prompt used to summarize past prompts.', title: 'A prompt used to summarize past prompts.',
placeholder: 'Enter a prompt that will be used to summarize past prompts here.', placeholder: 'Enter a prompt that will be used to summarize past prompts here.',
type: 'textarea', type: 'textarea',

View File

@ -31,11 +31,16 @@
export const countPromptTokens = (prompts:Message[], model:Model):number => { export const countPromptTokens = (prompts:Message[], model:Model):number => {
return prompts.reduce((a, m) => { return prompts.reduce((a, m) => {
// Not sure how OpenAI formats it, but this seems to get close to the right counts. a += countMessageTokens(m, model)
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different
a += encode('## ' + m.role + ' ##:\r\n\r\n' + m.content + '\r\n\r\n\r\n').length
return a return a
}, 0) + 3 }, 0) + 3 // Always seems to be message counts + 3
}
export const countMessageTokens = (message:Message, model:Model):number => {
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different.
// Complete stab in the dark here -- update if you know where all the extra tokens really come from.
return encode('## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n').length
} }
export const getModelMaxTokens = (model:Model):number => { export const getModelMaxTokens = (model:Model):number => {

View File

@ -19,6 +19,10 @@
const chatDefaults = getChatDefaults() const chatDefaults = getChatDefaults()
export const getApiKey = (): string => {
return get(apiKeyStorage)
}
export const newChatID = (): number => { export const newChatID = (): number => {
const chats = get(chatsStorage) const chats = get(chatsStorage)
const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1 const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1
@ -203,6 +207,10 @@
chatsStorage.set(chats) chatsStorage.set(chats)
} }
export const addError = (chatId: number, error: string) => {
addMessage(chatId, { content: error } as Message)
}
export const addMessage = (chatId: number, message: Message) => { export const addMessage = (chatId: number, message: Message) => {
const chats = get(chatsStorage) const chats = get(chatsStorage)
const chat = chats.find((chat) => chat.id === chatId) as Chat const chat = chats.find((chat) => chat.id === chatId) as Chat
@ -232,6 +240,7 @@
console.error("Couldn't insert after message:", insertAfter) console.error("Couldn't insert after message:", insertAfter)
return return
} }
newMessages.forEach(m => { m.uuid = m.uuid || uuidv4() })
chat.messages.splice(index + 1, 0, ...newMessages) chat.messages.splice(index + 1, 0, ...newMessages)
chatsStorage.set(chats) chatsStorage.set(chats)
} }

View File

@ -38,7 +38,7 @@
} }
export type Request = { export type Request = {
model?: Model; model: Model;
messages?: Message[]; messages?: Message[];
temperature?: number; temperature?: number;
top_p?: number; top_p?: number;

View File

@ -60,6 +60,11 @@
} }
} }
export const scrollToBottom = (instant:boolean = false) => {
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
}
export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => { export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => {
if (!event || event.key !== 'Escape') return if (!event || event.key !== 'Escape') return
dispatchModalEsc() dispatchModalEsc()