Add add streaming responses based on #107

This commit is contained in:
Webifi 2023-06-07 01:47:10 -05:00
parent fffe34c80c
commit 15272de1d4
8 changed files with 350 additions and 130 deletions

View File

@ -607,6 +607,20 @@ aside.menu.main-menu .menu-expanse {
border-top-left-radius: 0px !important;
border-bottom-left-radius: 0px !important;
}
.message.streaming .tool-drawer, .message.streaming .tool-drawer-mask {
display: none;
}
@keyframes cursor-blink {
0% {
opacity: 0;
}
}
.message.streaming .message-display p:last-of-type::after {
position: relative;
content: '';
animation: cursor-blink 1s steps(2) infinite;
}
.modal {
z-index:100;

View File

@ -8,17 +8,23 @@
insertMessages,
getChatSettingValueNullDefault,
updateChatSettings,
updateRunningTotal,
checkStateChange,
showSetChatSettings,
submitExitingPromptsNow
submitExitingPromptsNow,
deleteMessage
} from './Storage.svelte'
import { getRequestSettingList, defaultModel } from './Settings.svelte'
import {
type Request,
type Response,
type Message,
type Chat
type Chat,
type ChatCompletionOpts,
type Usage
} from './Types.svelte'
import Prompts from './Prompts.svelte'
import Messages from './Messages.svelte'
@ -37,11 +43,13 @@
// import { encode } from 'gpt-tokenizer'
import { v4 as uuidv4 } from 'uuid'
import { countPromptTokens, getModelMaxTokens, getPrice } from './Stats.svelte'
import { autoGrowInputOnEvent, sizeTextElements } from './Util.svelte'
import { autoGrowInputOnEvent, scrollToMessage, sizeTextElements } from './Util.svelte'
import ChatSettingsModal from './ChatSettingsModal.svelte'
import Footer from './Footer.svelte'
import { openModal } from 'svelte-modals'
import PromptInput from './PromptInput.svelte'
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
import { fetchEventSource } from '@microsoft/fetch-event-source'
// This makes it possible to override the OpenAI API base URL in the .env file
const apiBase = import.meta.env.VITE_API_BASE || 'https://api.openai.com'
@ -52,8 +60,6 @@
let updating: boolean = false
let updatingMessage: string = ''
let input: HTMLTextAreaElement
// let settings: HTMLDivElement
// let chatNameSettings: HTMLFormElement
let recognition: any = null
let recording = false
@ -141,20 +147,24 @@
// Scroll to the bottom of the chat on update
const focusInput = () => {
input.focus()
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: 'smooth', block: 'end' }), 0)
setTimeout(() => scrollToBottom(), 0)
}
const scrollToBottom = (instant:boolean = false) => {
document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' })
}
// Send API request
const sendRequest = async (messages: Message[], summaryTarget:number|undefined = undefined, withSummary:boolean = false): Promise<Response> => {
const sendRequest = async (messages: Message[], opts:ChatCompletionOpts): Promise<ChatCompletionResponse> => {
// Show updating bar
opts.chat = chat
const chatResponse = new ChatCompletionResponse(opts)
updating = true
const model = chat.settings.model || defaultModel
const maxTokens = getModelMaxTokens(model) // max tokens for model
let response: Response
const messageFilter = (m) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
// Submit only the role and content of the messages, provide the previous messages as well for context
let filtered = messages.filter(messageFilter)
@ -166,8 +176,8 @@
// console.log('Estimated',promptTokenCount,'prompt token for this request')
if (chatSettings.useSummarization &&
!withSummary && !summaryTarget &&
if (chatSettings.useSummarization && !opts.didSummary &&
!opts.summaryRequest && !opts.maxTokens &&
promptTokenCount > chatSettings.summaryThreshold) {
// Too many tokens -- well need to sumarize some past ones else we'll run out of space
// Get a block of past prompts we'll summarize
@ -239,35 +249,47 @@
summarizeReq.push(summaryMessage)
summaryPromptSize = countPromptTokens(summarizeReq, model)
const summaryResponse:Message = {
role: 'assistant',
content: '',
uuid: uuidv4(),
streaming: opts.streaming,
summary: []
}
summaryResponse.usage = {
prompt_tokens: 0
} as Usage
summaryResponse.model = model
// Insert summary prompt
insertMessages(chatId, endPrompt, [summaryResponse])
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
// Wait for the summary completion
updatingMessage = 'Building Summary...'
const summary = await sendRequest(summarizeReq, summarySize)
if (summary.error) {
updatingMessage = 'Summarizing...'
const summary = await sendRequest(summarizeReq, {
summaryRequest: true,
streaming: opts.streaming,
maxTokens: summarySize,
fillMessage: summaryResponse,
autoAddMessages: true,
onMessageChange: (m) => {
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
}
} as ChatCompletionOpts)
if (!summary.hasFinished()) await summary.promiseToFinish()
if (summary.hasError()) {
// Failed to some API issue. let the original caller handle it.
deleteMessage(chatId, summaryResponse.uuid)
return summary
} else {
// Get response
const summaryPromptContent: string = summary.choices.reduce((a, c) => {
if (a.length > c.message.content.length) return a
a = c.message.content
return a
}, '')
// Looks like we got our summarized messages.
// get ids of messages we summarized
const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
// Mark the new summaries as such
const summaryPrompt:Message = {
role: 'assistant',
content: summaryPromptContent,
uuid: uuidv4(),
summary: summarizedIds,
usage: summary.usage,
model
}
const summaryIds = [summaryPrompt.uuid]
// Insert messages
insertMessages(chatId, endPrompt, [summaryPrompt])
summaryResponse.summary = summarizedIds
const summaryIds = [summaryResponse.uuid]
// Disable the messages we summarized so they still show in history
summarize.forEach((m, i) => {
if (i - systemPad >= pinTop) {
@ -278,7 +300,8 @@
// Re-run request with summarized prompts
// return { error: { message: "End for now" } } as Response
updatingMessage = 'Continuing...'
return await sendRequest(chat.messages, undefined, true)
opts.didSummary = true
return await sendRequest(chat.messages, opts)
}
} else if (!summaryPrompt) {
addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
@ -315,67 +338,79 @@
if (typeof setting.apiTransform === 'function') {
value = setting.apiTransform(chatId, setting, value)
}
if (summaryTarget) {
if (opts.summaryRequest && opts.maxTokens) {
// requesting summary. do overrides
if (setting.key === 'max_tokens') value = summaryTarget // only as large as we need for summary
if (setting.key === 'n') value = 1 // never more than one completion
if (setting.key === 'max_tokens') value = opts.maxTokens // only as large as we need for summary
if (setting.key === 'n') value = 1 // never more than one completion for summary
}
if (opts.streaming) {
/*
Streaming goes insane with more than one completion.
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
different completions.
*/
if (setting.key === 'n') value = 1
}
if (value !== null) acc[setting.key] = value
return acc
}, {})
}
// Not working yet: a way to get the response as a stream
/*
request.stream = true
await fetchEventSource(apiBase + '/v1/chat/completions', {
method: 'POST',
headers: {
Authorization:
`Bearer ${$apiKeyStorage}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(request),
onmessage (ev) {
const data = JSON.parse(ev.data)
console.log(data)
},
onerror (err) {
throw err
}
})
*/
request.stream = opts.streaming
response = await (
await fetch(apiBase + '/v1/chat/completions', {
chatResponse.setPromptTokenCount(promptTokenCount)
const fetchOptions = {
method: 'POST',
headers: {
Authorization: `Bearer ${$apiKeyStorage}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(request)
}
if (opts.streaming) {
fetchEventSource(apiBase + '/v1/chat/completions', {
...fetchOptions,
onmessage (ev) {
// Remove updating indicator
updating = false
updatingMessage = ''
if (!chatResponse.hasFinished()) {
// console.log('ev.data', ev.data)
if (ev.data === '[DONE]') {
// ?? anything to do when "[DONE]"?
} else {
const data = JSON.parse(ev.data)
// console.log('data', data)
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
}
}
},
onerror (err) {
throw err
}
}).catch(err => {
chatResponse.updateFromError(err.message)
})
).json()
} else {
const response = await fetch(apiBase + '/v1/chat/completions', fetchOptions)
const json = await response.json()
// Remove updating indicator
updating = false
updatingMessage = ''
chatResponse.updateFromSyncResponse(json)
}
} catch (e) {
response = { error: { message: e.message } } as Response
chatResponse.updateFromError(e.message)
}
// Hide updating bar
updating = false
updatingMessage = ''
if (!response.error) {
// Add response counts to usage totals
updateRunningTotal(chatId, response.usage, response.model)
// const completionTokenCount:number = response.choices.reduce((a, c) => {
// // unlike the prompts, token count of the completion is just the completion.
// a += encode(c.message.content).length
// return a
// }, 0)
// console.log('estimated response token count', completionTokenCount)
}
return response
return chatResponse
}
const addNewMessage = () => {
@ -397,6 +432,14 @@
focusInput()
}
const tts = (text:string, recorded:boolean) => {
// Use TTS to read the response, if query was recorded
if (recorded && 'SpeechSynthesisUtterance' in window) {
const utterance = new SpeechSynthesisUtterance(text)
window.speechSynthesis.speak(utterance)
}
}
const submitForm = async (recorded: boolean = false, skipInput: boolean = false): Promise<void> => {
// Compose the system prompt message if there are no messages yet - disabled for now
if (updating) return
@ -419,29 +462,18 @@
}
focusInput()
const response = await sendRequest(chat.messages)
if (response.error) {
addMessage(chatId, {
role: 'error',
content: `Error: ${response.error.message}`,
uuid: uuidv4()
})
} else {
response.choices.forEach((choice) => {
// Store usage and model in the message
choice.message.usage = response.usage
choice.message.model = response.model
// Remove whitespace around the message that the OpenAI API sometimes returns
choice.message.content = choice.message.content.trim()
addMessage(chatId, choice.message)
// Use TTS to read the response, if query was recorded
if (recorded && 'SpeechSynthesisUtterance' in window) {
const utterance = new SpeechSynthesisUtterance(choice.message.content)
window.speechSynthesis.speak(utterance)
const response = await sendRequest(chat.messages, {
chat,
autoAddMessages: true, // Auto-add and update messages in array
streaming: chatSettings.stream,
onMessageChange: (messages) => {
scrollToBottom(true)
}
})
await response.promiseToFinish()
const message = response.getMessages()[0]
if (message) {
tts(message.content, recorded)
}
focusInput()
}
@ -456,17 +488,22 @@
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
suggestMessages.push(suggestMessage)
const response = await sendRequest(suggestMessages, 20)
const response = await sendRequest(suggestMessages, {
chat,
autoAddMessages: false,
streaming: false
})
await response.promiseToFinish()
if (response.error) {
if (response.hasError()) {
addMessage(chatId, {
role: 'error',
content: `Unable to get suggested name: ${response.error.message}`,
content: `Unable to get suggested name: ${response.getError()}`,
uuid: uuidv4()
})
} else {
response.choices.forEach((choice) => {
chat.name = choice.message.content
response.getMessages().forEach(m => {
chat.name = m.content
})
}
}

View File

@ -0,0 +1,132 @@
<script context="module" lang="ts">
// TODO: Integrate API calls
import { addMessage, updateRunningTotal } from './Storage.svelte'
import type { Chat, ChatCompletionOpts, Message, Response, Usage } from './Types.svelte'
import { encode } from 'gpt-tokenizer'
import { v4 as uuidv4 } from 'uuid'
export class ChatCompletionResponse {
constructor (opts: ChatCompletionOpts) {
this.opts = opts
this.chat = opts.chat
this.messages = []
if (opts.fillMessage) this.messages.push(opts.fillMessage)
if (opts.onMessageChange) this.messageChangeListeners.push(opts.onMessageChange)
}
private opts: ChatCompletionOpts
private chat: Chat
private messages: Message[]
private error: string
private didFinish: boolean
private finishResolver: (value: Message[]) => void
private errorResolver: (error: string) => void
private finishPromise = new Promise<Message[]>((resolve, reject) => {
this.finishResolver = resolve
this.errorResolver = reject
})
private promptTokenCount:number
private finished = false
private messageChangeListeners: ((m: Message[]) => void)[] = []
setPromptTokenCount (tokens:number) {
this.promptTokenCount = tokens
}
updateFromSyncResponse (response: Response) {
response.choices.forEach((choice, i) => {
const message = this.messages[i] || choice.message
message.content = choice.message.content
message.usage = response.usage
message.model = response.model
message.role = choice.message.role
this.messages[i] = message
if (this.opts.autoAddMessages) addMessage(this.chat.id, message)
})
this.notifyMessageChange()
this.finish()
}
updateFromAsyncResponse (response: Response) {
let completionTokenCount = 0
response.choices.forEach((choice, i) => {
const message = this.messages[i] || {
role: 'assistant',
content: '',
uuid: uuidv4()
} as Message
choice.delta?.role && (message.role = choice.delta.role)
choice.delta?.content && (message.content += choice.delta.content)
completionTokenCount += encode(message.content).length
message.usage = response.usage || {
prompt_tokens: this.promptTokenCount
} as Usage
message.model = response.model
message.finish_reason = choice.finish_reason
message.streaming = choice.finish_reason === null
this.messages[i] = message
if (this.opts.autoAddMessages) addMessage(this.chat.id, message)
})
// total up the tokens
const totalTokens = this.promptTokenCount + completionTokenCount
this.messages.forEach(m => {
if (m.usage) {
m.usage.completion_tokens = completionTokenCount
m.usage.total_tokens = totalTokens
}
})
const finished = !this.messages.find(m => m.streaming)
this.notifyMessageChange()
if (finished) this.finish()
}
updateFromError (errorMessage: string): void {
this.error = errorMessage
if (this.opts.autoAddMessages) {
addMessage(this.chat.id, {
role: 'error',
content: `Error: ${errorMessage}`,
uuid: uuidv4()
} as Message)
}
this.notifyMessageChange()
this.finish()
}
onMessageChange = (listener: (m: Message[]) => void): number =>
this.messageChangeListeners.push(listener)
promiseToFinish = (): Promise<Message[]> => this.finishPromise
hasFinished = (): boolean => this.finished
getError = (): string => this.error
hasError = (): boolean => !!this.error
getMessages = (): Message[] => this.messages
private notifyMessageChange (): void {
this.messageChangeListeners.forEach((listener) => {
listener(this.messages)
})
}
private finish = (): void => {
if (this.didFinish) return
this.didFinish = true
const message = this.messages[0]
if (message) {
updateRunningTotal(this.chat.id, message.usage as any, message.model as any)
}
this.finished = true
if (this.error) {
this.errorResolver(this.error)
} else {
this.finishResolver(this.messages)
}
}
}
</script>

View File

@ -7,7 +7,7 @@
import type { Message, Model, Chat } from './Types.svelte'
import Fa from 'svelte-fa/src/fa.svelte'
import { faTrash, faDiagramPredecessor, faDiagramNext, faCircleCheck, faPaperPlane, faEye, faEyeSlash } from '@fortawesome/free-solid-svg-icons/index'
import { errorNotice, scrollIntoViewWithOffset } from './Util.svelte'
import { errorNotice, scrollToMessage } from './Util.svelte'
import { openModal } from 'svelte-modals'
import PromptConfirm from './PromptConfirm.svelte'
@ -43,7 +43,7 @@
})
const edit = () => {
if (noEdit) return
if (noEdit || message.streaming) return
editing = true
setTimeout(() => {
const el = document.getElementById('edit-' + message.uuid)
@ -77,22 +77,6 @@
}
}
const scrollToMessage = (uuid:string | string[] | undefined) => {
if (Array.isArray(uuid)) {
uuid = uuid[0]
}
if (!uuid) {
console.error('Not a valid uuid', uuid)
return
}
const el = document.getElementById('message-' + uuid)
if (el) {
scrollIntoViewWithOffset(el, 80)
} else {
console.error("Can't find element with message ID", uuid)
}
}
// Double click for mobile support
let lastTap: number = 0
const editOnDoubleTap = () => {
@ -146,7 +130,6 @@
}
}
let waitingForTruncateConfirm:any = 0
const checkTruncate = () => {
@ -195,6 +178,7 @@
class:summarized={message.summarized}
class:suppress={message.suppress}
class:editing={editing}
class:streaming={message.streaming}
>
<div class="message-body content">
@ -210,6 +194,9 @@
on:touchend={editOnDoubleTap}
on:dblclick|preventDefault={() => edit()}
>
{#if message.summary && !message.summary.length}
<p><b>Summarizing...</b></p>
{/if}
<SvelteMarkdown
source={message.content}
options={markdownOptions}

View File

@ -60,7 +60,7 @@ const gptDefaults = {
temperature: 1,
top_p: 1,
n: 1,
stream: false,
stream: true,
stop: null,
max_tokens: 512,
presence_penalty: 0,
@ -312,6 +312,12 @@ const chatSettingsList: ChatSetting[] = [
...summarySettings,
// ...responseAlterationSettings,
modelSetting,
{
key: 'stream',
name: 'Stream Response',
title: 'Stream responses as they are generated.',
type: 'boolean'
},
{
key: 'temperature',
name: 'Sampling Temperature',

View File

@ -89,6 +89,7 @@
// make sure old chat messages have UUID
chat.messages.forEach((m) => {
m.uuid = m.uuid || uuidv4()
delete m.streaming
})
// Make sure the usage totals object is set
// (some earlier versions of this had different structures)
@ -163,7 +164,10 @@
const chats = get(chatsStorage)
const chat = chats.find((chat) => chat.id === chatId) as Chat
if (!message.uuid) message.uuid = uuidv4()
if (chat.messages.indexOf(message) < 0) {
// Don't have message, add it
chat.messages.push(message)
}
chatsStorage.set(chats)
}

View File

@ -27,6 +27,8 @@
summarized?: string[];
summary?: string[];
suppress?: boolean;
finish_reason?: string;
streaming?: boolean;
};
export type ResponseAlteration = {
@ -88,6 +90,7 @@
index: number;
message: Message;
finish_reason: string;
delta: Message;
}[];
usage: Usage;
model: Model;
@ -111,6 +114,17 @@
}[];
};
export type ChatCompletionOpts = {
chat: Chat;
autoAddMessages: boolean;
maxTokens?:number;
summaryRequest?:boolean;
didSummary?:boolean;
streaming?:boolean;
onMessageChange?: (messages: Message[]) => void;
fillMessage?:Message,
};
export type GlobalSettings = {
profiles: Record<string, ChatSettings>;
lastProfile?: string;

View File

@ -21,15 +21,41 @@
anyEl.__didAutoGrow = true // don't resize this one again unless it's via an event
}
export const scrollIntoViewWithOffset = (element:HTMLElement, offset:number) => {
export const scrollIntoViewWithOffset = (element:HTMLElement, offset:number, instant:boolean = false, bottom:boolean = false) => {
const behavior = instant ? 'instant' : 'smooth'
if (bottom) {
window.scrollTo({
behavior: 'smooth',
behavior: behavior as any,
top:
(element.getBoundingClientRect().bottom) -
document.body.getBoundingClientRect().top - (window.innerHeight - offset)
})
} else {
window.scrollTo({
behavior: behavior as any,
top:
element.getBoundingClientRect().top -
document.body.getBoundingClientRect().top -
offset
})
}
}
export const scrollToMessage = (uuid:string | string[] | undefined, offset:number = 60, instant:boolean = false, bottom:boolean = false) => {
if (Array.isArray(uuid)) {
uuid = uuid[0]
}
if (!uuid) {
console.error('Not a valid uuid', uuid)
return
}
const el = document.getElementById('message-' + uuid)
if (el) {
scrollIntoViewWithOffset(el, offset, instant, bottom)
} else {
console.error("Can't find element with message ID", uuid)
}
}
export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => {
if (!event || event.key !== 'Escape') return