sendRequest refactor
This commit is contained in:
parent
2660512830
commit
66336a0a13
|
@ -2,36 +2,22 @@
|
||||||
// This beast needs to be broken down into multiple components before it gets any worse.
|
// This beast needs to be broken down into multiple components before it gets any worse.
|
||||||
import {
|
import {
|
||||||
saveChatStore,
|
saveChatStore,
|
||||||
apiKeyStorage,
|
|
||||||
chatsStorage,
|
chatsStorage,
|
||||||
addMessage,
|
addMessage,
|
||||||
insertMessages,
|
|
||||||
getChatSettingValueNullDefault,
|
|
||||||
updateChatSettings,
|
updateChatSettings,
|
||||||
checkStateChange,
|
checkStateChange,
|
||||||
showSetChatSettings,
|
showSetChatSettings,
|
||||||
submitExitingPromptsNow,
|
submitExitingPromptsNow,
|
||||||
deleteMessage,
|
|
||||||
continueMessage,
|
continueMessage,
|
||||||
getMessage
|
getMessage
|
||||||
} from './Storage.svelte'
|
} from './Storage.svelte'
|
||||||
import { getRequestSettingList, defaultModel } from './Settings.svelte'
|
|
||||||
import {
|
import {
|
||||||
type Request,
|
|
||||||
type Message,
|
type Message,
|
||||||
type Chat,
|
type Chat
|
||||||
type ChatCompletionOpts,
|
|
||||||
|
|
||||||
type Model,
|
|
||||||
|
|
||||||
type ChatSettings
|
|
||||||
|
|
||||||
|
|
||||||
} from './Types.svelte'
|
} from './Types.svelte'
|
||||||
import Prompts from './Prompts.svelte'
|
import Prompts from './Prompts.svelte'
|
||||||
import Messages from './Messages.svelte'
|
import Messages from './Messages.svelte'
|
||||||
import { mergeProfileFields, prepareSummaryPrompt, restartProfile } from './Profiles.svelte'
|
import { restartProfile } from './Profiles.svelte'
|
||||||
|
|
||||||
import { afterUpdate, onMount, onDestroy } from 'svelte'
|
import { afterUpdate, onMount, onDestroy } from 'svelte'
|
||||||
import Fa from 'svelte-fa/src/fa.svelte'
|
import Fa from 'svelte-fa/src/fa.svelte'
|
||||||
import {
|
import {
|
||||||
|
@ -41,27 +27,29 @@
|
||||||
faPenToSquare,
|
faPenToSquare,
|
||||||
faMicrophone,
|
faMicrophone,
|
||||||
faLightbulb,
|
faLightbulb,
|
||||||
faCommentSlash
|
faCommentSlash,
|
||||||
|
|
||||||
|
faCircleCheck
|
||||||
|
|
||||||
} from '@fortawesome/free-solid-svg-icons/index'
|
} from '@fortawesome/free-solid-svg-icons/index'
|
||||||
import { encode } from 'gpt-tokenizer'
|
|
||||||
import { v4 as uuidv4 } from 'uuid'
|
import { v4 as uuidv4 } from 'uuid'
|
||||||
import { countPromptTokens, getModelMaxTokens, getPrice } from './Stats.svelte'
|
import { getPrice } from './Stats.svelte'
|
||||||
import { autoGrowInputOnEvent, scrollToMessage, sizeTextElements } from './Util.svelte'
|
import { autoGrowInputOnEvent, scrollToBottom, sizeTextElements } from './Util.svelte'
|
||||||
import ChatSettingsModal from './ChatSettingsModal.svelte'
|
import ChatSettingsModal from './ChatSettingsModal.svelte'
|
||||||
import Footer from './Footer.svelte'
|
import Footer from './Footer.svelte'
|
||||||
import { openModal } from 'svelte-modals'
|
import { openModal } from 'svelte-modals'
|
||||||
import PromptInput from './PromptInput.svelte'
|
import PromptInput from './PromptInput.svelte'
|
||||||
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
|
import { ChatRequest } from './ChatRequest.svelte'
|
||||||
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
|
|
||||||
import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
|
|
||||||
|
|
||||||
export let params = { chatId: '' }
|
export let params = { chatId: '' }
|
||||||
const chatId: number = parseInt(params.chatId)
|
const chatId: number = parseInt(params.chatId)
|
||||||
|
|
||||||
let controller:AbortController = new AbortController()
|
let chatRequest = new ChatRequest()
|
||||||
|
|
||||||
let updating: boolean|number = false
|
// let controller:AbortController
|
||||||
let updatingMessage: string = ''
|
|
||||||
|
// let updating: boolean|number = false
|
||||||
|
// let updatingMessage: string = ''
|
||||||
let input: HTMLTextAreaElement
|
let input: HTMLTextAreaElement
|
||||||
let recognition: any = null
|
let recognition: any = null
|
||||||
let recording = false
|
let recording = false
|
||||||
|
@ -111,12 +99,15 @@
|
||||||
onDestroy(async () => {
|
onDestroy(async () => {
|
||||||
// clean up
|
// clean up
|
||||||
// abort any pending requests.
|
// abort any pending requests.
|
||||||
controller.abort()
|
chatRequest.controller.abort()
|
||||||
ttsStop()
|
ttsStop()
|
||||||
})
|
})
|
||||||
|
|
||||||
onMount(async () => {
|
onMount(async () => {
|
||||||
if (!chat) return
|
if (!chat) return
|
||||||
|
|
||||||
|
chatRequest = new ChatRequest()
|
||||||
|
chatRequest.setChat(chat)
|
||||||
// Focus the input on mount
|
// Focus the input on mount
|
||||||
focusInput()
|
focusInput()
|
||||||
|
|
||||||
|
@ -170,349 +161,8 @@
|
||||||
scrollToBottom()
|
scrollToBottom()
|
||||||
}
|
}
|
||||||
|
|
||||||
const scrollToBottom = (instant:boolean = false) => {
|
|
||||||
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send API request
|
|
||||||
const sendRequest = async (messages: Message[], opts:ChatCompletionOpts, overrides:ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> => {
|
|
||||||
// Show updating bar
|
|
||||||
opts.chat = chat
|
|
||||||
const chatResponse = new ChatCompletionResponse(opts)
|
|
||||||
updating = true
|
|
||||||
|
|
||||||
const model = chat.settings.model || defaultModel
|
|
||||||
const maxTokens = getModelMaxTokens(model) // max tokens for model
|
|
||||||
|
|
||||||
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
|
|
||||||
|
|
||||||
// Submit only the role and content of the messages, provide the previous messages as well for context
|
|
||||||
let filtered = messages.filter(messageFilter)
|
|
||||||
|
|
||||||
// Get an estimate of the total prompt size we're sending
|
|
||||||
let promptTokenCount:number = countPromptTokens(filtered, model)
|
|
||||||
|
|
||||||
let summarySize = chatSettings.summarySize
|
|
||||||
|
|
||||||
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
|
|
||||||
|
|
||||||
if (hiddenPromptPrefix && filtered.length && filtered[filtered.length - 1].role === 'user') {
|
|
||||||
// update estimate with hiddenPromptPrefix token count
|
|
||||||
promptTokenCount += encode(hiddenPromptPrefix + '\n\n').length
|
|
||||||
}
|
|
||||||
|
|
||||||
// console.log('Estimated',promptTokenCount,'prompt token for this request')
|
|
||||||
|
|
||||||
if (chatSettings.continuousChat && !opts.didSummary &&
|
|
||||||
!opts.summaryRequest && !opts.maxTokens &&
|
|
||||||
promptTokenCount > chatSettings.summaryThreshold) {
|
|
||||||
// Too many tokens -- well need to summarize some past ones else we'll run out of space
|
|
||||||
// Get a block of past prompts we'll summarize
|
|
||||||
let pinTop = chatSettings.pinTop
|
|
||||||
const tp = chatSettings.trainingPrompts
|
|
||||||
pinTop = Math.max(pinTop, tp ? 1 : 0)
|
|
||||||
let pinBottom = chatSettings.pinBottom
|
|
||||||
const systemPad = (filtered[0] || {} as Message).role === 'system' ? 1 : 0
|
|
||||||
const mlen = filtered.length - systemPad // always keep system prompt
|
|
||||||
let diff = mlen - (pinTop + pinBottom)
|
|
||||||
const useFIFO = chatSettings.continuousChat === 'fifo' || !prepareSummaryPrompt(chatId, 0)
|
|
||||||
if (!useFIFO) {
|
|
||||||
while (diff <= 3 && (pinTop > 0 || pinBottom > 1)) {
|
|
||||||
// Not enough prompts exposed to summarize
|
|
||||||
// try to open up pinTop and pinBottom to see if we can get more to summarize
|
|
||||||
if (pinTop === 1 && pinBottom > 1) {
|
|
||||||
// If we have a pin top, try to keep some of it as long as we can
|
|
||||||
pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
|
|
||||||
} else {
|
|
||||||
pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
|
|
||||||
pinTop = Math.max(Math.floor(pinTop / 2), 0)
|
|
||||||
}
|
|
||||||
diff = mlen - (pinTop + pinBottom)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!useFIFO && diff > 0) {
|
|
||||||
// We've found at least one prompt we can try to summarize
|
|
||||||
// Reduce to prompts we'll send in for summary
|
|
||||||
// (we may need to update this to not include the pin-top, but the context it provides seems to help in the accuracy of the summary)
|
|
||||||
const summarize = filtered.slice(0, filtered.length - pinBottom)
|
|
||||||
// Estimate token count of what we'll be summarizing
|
|
||||||
let sourceTokenCount = countPromptTokens(summarize, model)
|
|
||||||
// build summary prompt message
|
|
||||||
let summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
|
||||||
|
|
||||||
const summaryMessage = {
|
|
||||||
role: 'user',
|
|
||||||
content: summaryPrompt
|
|
||||||
} as Message
|
|
||||||
// get an estimate of how many tokens this request + max completions could be
|
|
||||||
let summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
|
|
||||||
// reduce summary size to make sure we're not requesting a summary larger than our prompts
|
|
||||||
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
|
||||||
// Make sure our prompt + completion request isn't too large
|
|
||||||
while (summarize.length - (pinTop + systemPad) >= 3 && summaryPromptSize + summarySize > maxTokens && summarySize >= 4) {
|
|
||||||
summarize.pop()
|
|
||||||
sourceTokenCount = countPromptTokens(summarize, model)
|
|
||||||
summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
|
|
||||||
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
|
||||||
}
|
|
||||||
// See if we have to adjust our max summarySize
|
|
||||||
if (summaryPromptSize + summarySize > maxTokens) {
|
|
||||||
summarySize = maxTokens - summaryPromptSize
|
|
||||||
}
|
|
||||||
// Always try to end the prompts being summarized with a user prompt. Seems to work better.
|
|
||||||
while (summarize.length - (pinTop + systemPad) >= 4 && summarize[summarize.length - 1].role !== 'user') {
|
|
||||||
summarize.pop()
|
|
||||||
}
|
|
||||||
// update with actual
|
|
||||||
sourceTokenCount = countPromptTokens(summarize, model)
|
|
||||||
summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
|
||||||
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
|
||||||
summaryMessage.content = summaryPrompt
|
|
||||||
if (sourceTokenCount > 20 && summaryPrompt && summarySize > 4) {
|
|
||||||
// get prompt we'll be inserting after
|
|
||||||
const endPrompt = summarize[summarize.length - 1]
|
|
||||||
// Add a prompt to ask to summarize them
|
|
||||||
const summarizeReq = summarize.slice()
|
|
||||||
summarizeReq.push(summaryMessage)
|
|
||||||
summaryPromptSize = countPromptTokens(summarizeReq, model)
|
|
||||||
|
|
||||||
// Create a message the summary will be loaded into
|
|
||||||
const summaryResponse:Message = {
|
|
||||||
role: 'assistant',
|
|
||||||
content: '',
|
|
||||||
uuid: uuidv4(),
|
|
||||||
streaming: opts.streaming,
|
|
||||||
summary: []
|
|
||||||
}
|
|
||||||
summaryResponse.model = model
|
|
||||||
|
|
||||||
// Insert summary completion prompt
|
|
||||||
insertMessages(chatId, endPrompt, [summaryResponse])
|
|
||||||
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
|
|
||||||
|
|
||||||
// Wait for the summary completion
|
|
||||||
updatingMessage = 'Summarizing...'
|
|
||||||
const summary = await sendRequest(summarizeReq, {
|
|
||||||
summaryRequest: true,
|
|
||||||
streaming: opts.streaming,
|
|
||||||
maxTokens: summarySize,
|
|
||||||
fillMessage: summaryResponse,
|
|
||||||
autoAddMessages: true,
|
|
||||||
onMessageChange: (m) => {
|
|
||||||
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
|
|
||||||
}
|
|
||||||
} as ChatCompletionOpts, {
|
|
||||||
temperature: 0, // make summary more deterministic
|
|
||||||
top_p: 0.2,
|
|
||||||
presence_penalty: -0.5,
|
|
||||||
frequency_penalty: 0,
|
|
||||||
...overrides
|
|
||||||
} as ChatSettings)
|
|
||||||
if (!summary.hasFinished()) await summary.promiseToFinish()
|
|
||||||
if (summary.hasError()) {
|
|
||||||
// Failed to some API issue. let the original caller handle it.
|
|
||||||
deleteMessage(chatId, summaryResponse.uuid)
|
|
||||||
return summary
|
|
||||||
} else {
|
|
||||||
// Looks like we got our summarized messages.
|
|
||||||
// get ids of messages we summarized
|
|
||||||
const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
|
|
||||||
// Mark the new summaries as such
|
|
||||||
summaryResponse.summary = summarizedIds
|
|
||||||
|
|
||||||
const summaryIds = [summaryResponse.uuid]
|
|
||||||
// Disable the messages we summarized so they still show in history
|
|
||||||
summarize.forEach((m, i) => {
|
|
||||||
if (i - systemPad >= pinTop) {
|
|
||||||
m.summarized = summaryIds
|
|
||||||
}
|
|
||||||
})
|
|
||||||
saveChatStore()
|
|
||||||
// Re-run request with summarized prompts
|
|
||||||
// return { error: { message: "End for now" } } as Response
|
|
||||||
updatingMessage = 'Continuing...'
|
|
||||||
opts.didSummary = true
|
|
||||||
return await sendRequest(chat.messages, opts)
|
|
||||||
}
|
|
||||||
} else if (!summaryPrompt) {
|
|
||||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
|
|
||||||
} else if (sourceTokenCount <= 20) {
|
|
||||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough words in past content to summarize.', uuid: uuidv4() })
|
|
||||||
}
|
|
||||||
} else if (!useFIFO && diff < 1) {
|
|
||||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough messages in past content to summarize.', uuid: uuidv4() })
|
|
||||||
} else {
|
|
||||||
// roll-off/fifo mode
|
|
||||||
const top = filtered.slice(0, pinTop + systemPad)
|
|
||||||
const rollaway = filtered.slice(pinTop + systemPad)
|
|
||||||
let promptTokenCount = countPromptTokens(top.concat(rollaway), model)
|
|
||||||
// suppress messages we're rolling off
|
|
||||||
while (rollaway.length > (((promptTokenCount + (chatSettings.max_tokens || 1)) > maxTokens) ? pinBottom || 1 : 1) &&
|
|
||||||
promptTokenCount >= chatSettings.summaryThreshold) {
|
|
||||||
const rollOff = rollaway.shift()
|
|
||||||
if (rollOff) rollOff.suppress = true
|
|
||||||
promptTokenCount = countPromptTokens(top.concat(rollaway), model)
|
|
||||||
}
|
|
||||||
saveChatStore()
|
|
||||||
// get a new list now excluding them
|
|
||||||
filtered = messages.filter(messageFilter)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const messagePayload = filtered.map((m, i) => {
|
|
||||||
const r = { role: m.role, content: m.content }
|
|
||||||
if (i === filtered.length - 1 && m.role === 'user' && hiddenPromptPrefix && !opts.summaryRequest) {
|
|
||||||
// If the last prompt is a user prompt, and we have a hiddenPromptPrefix, inject it
|
|
||||||
r.content = hiddenPromptPrefix + '\n\n' + m.content
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}) as Message[]
|
|
||||||
|
|
||||||
// Update token count with actual
|
|
||||||
promptTokenCount = countPromptTokens(messagePayload, model)
|
|
||||||
const maxAllowed = getModelMaxTokens(chatSettings.model as Model) - (promptTokenCount + 1)
|
|
||||||
|
|
||||||
try {
|
|
||||||
const request: Request = {
|
|
||||||
messages: messagePayload,
|
|
||||||
// Provide the settings by mapping the settingsMap to key/value pairs
|
|
||||||
...getRequestSettingList().reduce((acc, setting) => {
|
|
||||||
const key = setting.key
|
|
||||||
let value = getChatSettingValueNullDefault(chatId, setting)
|
|
||||||
if (key in overrides) value = overrides[key]
|
|
||||||
if (typeof setting.apiTransform === 'function') {
|
|
||||||
value = setting.apiTransform(chatId, setting, value)
|
|
||||||
}
|
|
||||||
if (key === 'max_tokens') {
|
|
||||||
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
|
|
||||||
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
|
|
||||||
}
|
|
||||||
if (key === 'n') {
|
|
||||||
if (opts.streaming || opts.summaryRequest) {
|
|
||||||
/*
|
|
||||||
Streaming goes insane with more than one completion.
|
|
||||||
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
|
|
||||||
different completions.
|
|
||||||
Summary should only have one completion
|
|
||||||
*/
|
|
||||||
value = 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (value !== null) acc[key] = value
|
|
||||||
return acc
|
|
||||||
}, {})
|
|
||||||
}
|
|
||||||
|
|
||||||
request.stream = opts.streaming
|
|
||||||
|
|
||||||
chatResponse.setPromptTokenCount(promptTokenCount) // streaming needs this
|
|
||||||
|
|
||||||
const signal = controller.signal
|
|
||||||
|
|
||||||
// console.log('apikey', $apiKeyStorage)
|
|
||||||
|
|
||||||
const fetchOptions = {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
Authorization: `Bearer ${$apiKeyStorage}`,
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
},
|
|
||||||
body: JSON.stringify(request),
|
|
||||||
signal
|
|
||||||
}
|
|
||||||
|
|
||||||
const handleError = async (response) => {
|
|
||||||
let errorResponse
|
|
||||||
try {
|
|
||||||
const errObj = await response.json()
|
|
||||||
errorResponse = errObj?.error?.message || errObj?.error?.code
|
|
||||||
if (!errorResponse && response.choices && response.choices[0]) {
|
|
||||||
errorResponse = response.choices[0]?.message?.content
|
|
||||||
}
|
|
||||||
errorResponse = errorResponse || 'Unexpected Response'
|
|
||||||
} catch (e) {
|
|
||||||
errorResponse = 'Unknown Response'
|
|
||||||
}
|
|
||||||
throw new Error(`${response.status} - ${errorResponse}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetchEventSource doesn't seem to throw on abort, so...
|
|
||||||
const abortListener = (e:Event) => {
|
|
||||||
controller = new AbortController()
|
|
||||||
chatResponse.updateFromError('User aborted request.')
|
|
||||||
signal.removeEventListener('abort', abortListener)
|
|
||||||
}
|
|
||||||
signal.addEventListener('abort', abortListener)
|
|
||||||
|
|
||||||
if (opts.streaming) {
|
|
||||||
chatResponse.onFinish(() => {
|
|
||||||
updating = false
|
|
||||||
updatingMessage = ''
|
|
||||||
scrollToBottom()
|
|
||||||
})
|
|
||||||
fetchEventSource(getApiBase() + getEndpointCompletions(), {
|
|
||||||
...fetchOptions,
|
|
||||||
openWhenHidden: true,
|
|
||||||
onmessage (ev) {
|
|
||||||
// Remove updating indicator
|
|
||||||
updating = 1 // hide indicator, but still signal we're updating
|
|
||||||
updatingMessage = ''
|
|
||||||
// console.log('ev.data', ev.data)
|
|
||||||
if (!chatResponse.hasFinished()) {
|
|
||||||
if (ev.data === '[DONE]') {
|
|
||||||
// ?? anything to do when "[DONE]"?
|
|
||||||
} else {
|
|
||||||
const data = JSON.parse(ev.data)
|
|
||||||
// console.log('data', data)
|
|
||||||
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
onclose () {
|
|
||||||
chatResponse.updateFromClose()
|
|
||||||
},
|
|
||||||
onerror (err) {
|
|
||||||
console.error(err)
|
|
||||||
throw err
|
|
||||||
},
|
|
||||||
async onopen (response) {
|
|
||||||
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
|
|
||||||
// everything's good
|
|
||||||
} else {
|
|
||||||
// client-side errors are usually non-retriable:
|
|
||||||
await handleError(response)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}).catch(err => {
|
|
||||||
chatResponse.updateFromError(err.message)
|
|
||||||
scrollToBottom()
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
|
|
||||||
if (!response.ok) {
|
|
||||||
await handleError(response)
|
|
||||||
} else {
|
|
||||||
const json = await response.json()
|
|
||||||
// Remove updating indicator
|
|
||||||
updating = false
|
|
||||||
updatingMessage = ''
|
|
||||||
chatResponse.updateFromSyncResponse(json)
|
|
||||||
scrollToBottom()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
// console.error(e)
|
|
||||||
updating = false
|
|
||||||
updatingMessage = ''
|
|
||||||
chatResponse.updateFromError(e.message)
|
|
||||||
scrollToBottom()
|
|
||||||
}
|
|
||||||
|
|
||||||
return chatResponse
|
|
||||||
}
|
|
||||||
|
|
||||||
const addNewMessage = () => {
|
const addNewMessage = () => {
|
||||||
if (updating) return
|
if (chatRequest.updating) return
|
||||||
let inputMessage: Message
|
let inputMessage: Message
|
||||||
const lastMessage = chat.messages[chat.messages.length - 1]
|
const lastMessage = chat.messages[chat.messages.length - 1]
|
||||||
const uuid = uuidv4()
|
const uuid = uuidv4()
|
||||||
|
@ -545,9 +195,21 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let waitingForCancel:any = 0
|
||||||
|
|
||||||
|
const cancelRequest = () => {
|
||||||
|
if (!waitingForCancel) {
|
||||||
|
// wait a second for another click to avoid accidental cancel
|
||||||
|
waitingForCancel = setTimeout(() => { waitingForCancel = 0 }, 1000)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
clearTimeout(waitingForCancel); waitingForCancel = 0
|
||||||
|
chatRequest.controller.abort()
|
||||||
|
}
|
||||||
|
|
||||||
const submitForm = async (recorded: boolean = false, skipInput: boolean = false, fillMessage: Message|undefined = undefined): Promise<void> => {
|
const submitForm = async (recorded: boolean = false, skipInput: boolean = false, fillMessage: Message|undefined = undefined): Promise<void> => {
|
||||||
// Compose the system prompt message if there are no messages yet - disabled for now
|
// Compose the system prompt message if there are no messages yet - disabled for now
|
||||||
if (updating) return
|
if (chatRequest.updating) return
|
||||||
|
|
||||||
lastSubmitRecorded = recorded
|
lastSubmitRecorded = recorded
|
||||||
|
|
||||||
|
@ -562,8 +224,6 @@
|
||||||
fillMessage = chat.messages[chat.messages.length - 1]
|
fillMessage = chat.messages[chat.messages.length - 1]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fillMessage && fillMessage.content) fillMessage.content += ' ' // add a space
|
|
||||||
|
|
||||||
// Clear the input value
|
// Clear the input value
|
||||||
input.value = ''
|
input.value = ''
|
||||||
input.blur()
|
input.blur()
|
||||||
|
@ -573,7 +233,7 @@
|
||||||
}
|
}
|
||||||
focusInput()
|
focusInput()
|
||||||
|
|
||||||
const response = await sendRequest(chat.messages, {
|
const response = await chatRequest.sendRequest(chat.messages, {
|
||||||
chat,
|
chat,
|
||||||
autoAddMessages: true, // Auto-add and update messages in array
|
autoAddMessages: true, // Auto-add and update messages in array
|
||||||
streaming: chatSettings.stream,
|
streaming: chatSettings.stream,
|
||||||
|
@ -600,7 +260,7 @@
|
||||||
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
|
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
|
||||||
suggestMessages.push(suggestMessage)
|
suggestMessages.push(suggestMessage)
|
||||||
|
|
||||||
const response = await sendRequest(suggestMessages, {
|
const response = await chatRequest.sendRequest(suggestMessages, {
|
||||||
chat,
|
chat,
|
||||||
autoAddMessages: false,
|
autoAddMessages: false,
|
||||||
streaming: false,
|
streaming: false,
|
||||||
|
@ -640,7 +300,7 @@
|
||||||
|
|
||||||
const recordToggle = () => {
|
const recordToggle = () => {
|
||||||
ttsStop()
|
ttsStop()
|
||||||
if (updating) return
|
if (chatRequest.updating) return
|
||||||
// Check if already recording - if so, stop - else start
|
// Check if already recording - if so, stop - else start
|
||||||
if (recording) {
|
if (recording) {
|
||||||
recognition?.stop()
|
recognition?.stop()
|
||||||
|
@ -677,11 +337,11 @@
|
||||||
|
|
||||||
<Messages messages={chat.messages} chatId={chatId} />
|
<Messages messages={chat.messages} chatId={chatId} />
|
||||||
|
|
||||||
{#if updating === true}
|
{#if chatRequest.updating === true}
|
||||||
<article class="message is-success assistant-message">
|
<article class="message is-success assistant-message">
|
||||||
<div class="message-body content">
|
<div class="message-body content">
|
||||||
<span class="is-loading" ></span>
|
<span class="is-loading" ></span>
|
||||||
<span>{updatingMessage}</span>
|
<span>{chatRequest.updatingMessage}</span>
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
{/if}
|
{/if}
|
||||||
|
@ -710,7 +370,7 @@
|
||||||
/>
|
/>
|
||||||
</p>
|
</p>
|
||||||
<p class="control mic" class:is-hidden={!recognition}>
|
<p class="control mic" class:is-hidden={!recognition}>
|
||||||
<button class="button" class:is-disabled={updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
|
<button class="button" class:is-disabled={chatRequest.updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
|
||||||
><span class="icon"><Fa icon={faMicrophone} /></span></button
|
><span class="icon"><Fa icon={faMicrophone} /></span></button
|
||||||
>
|
>
|
||||||
</p>
|
</p>
|
||||||
|
@ -718,11 +378,17 @@
|
||||||
<button title="Chat/Profile Settings" class="button" on:click|preventDefault={showSettingsModal}><span class="icon"><Fa icon={faGear} /></span></button>
|
<button title="Chat/Profile Settings" class="button" on:click|preventDefault={showSettingsModal}><span class="icon"><Fa icon={faGear} /></span></button>
|
||||||
</p>
|
</p>
|
||||||
<p class="control queue">
|
<p class="control queue">
|
||||||
<button title="Queue message, don't send yet" class:is-disabled={updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
|
<button title="Queue message, don't send yet" class:is-disabled={chatRequest.updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
|
||||||
</p>
|
</p>
|
||||||
{#if updating}
|
{#if chatRequest.updating}
|
||||||
<p class="control send">
|
<p class="control send">
|
||||||
<button title="Cancel Response" class="button is-danger" type="button" on:click={() => { controller.abort() }}><span class="icon"><Fa icon={faCommentSlash} /></span></button>
|
<button title="Cancel Response" class="button is-danger" type="button" on:click={cancelRequest}><span class="icon">
|
||||||
|
{#if waitingForCancel}
|
||||||
|
<Fa icon={faCircleCheck} />
|
||||||
|
{:else}
|
||||||
|
<Fa icon={faCommentSlash} />
|
||||||
|
{/if}
|
||||||
|
</span></button>
|
||||||
</p>
|
</p>
|
||||||
{:else}
|
{:else}
|
||||||
<p class="control send">
|
<p class="control send">
|
||||||
|
|
|
@ -34,7 +34,7 @@ export class ChatCompletionResponse {
|
||||||
|
|
||||||
private setModel = (model: Model) => {
|
private setModel = (model: Model) => {
|
||||||
if (!model) return
|
if (!model) return
|
||||||
!this.model && setLatestKnownModel(this.chat.settings.model as Model, model)
|
!this.model && setLatestKnownModel(this.chat.settings.model, model)
|
||||||
this.lastModel = this.model || model
|
this.lastModel = this.model || model
|
||||||
this.model = model
|
this.model = model
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,15 @@ export class ChatCompletionResponse {
|
||||||
private messageChangeListeners: ((m: Message[]) => void)[] = []
|
private messageChangeListeners: ((m: Message[]) => void)[] = []
|
||||||
private finishListeners: ((m: Message[]) => void)[] = []
|
private finishListeners: ((m: Message[]) => void)[] = []
|
||||||
|
|
||||||
|
private initialFillMerge (existingContent:string, newContent:string):string {
|
||||||
|
if (!this.didFill && this.isFill && existingContent && !newContent.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
|
||||||
|
// add a trailing space if our new content isn't a contraction
|
||||||
|
existingContent += ' '
|
||||||
|
}
|
||||||
|
this.didFill = true
|
||||||
|
return existingContent
|
||||||
|
}
|
||||||
|
|
||||||
setPromptTokenCount (tokens:number) {
|
setPromptTokenCount (tokens:number) {
|
||||||
this.promptTokenCount = tokens
|
this.promptTokenCount = tokens
|
||||||
}
|
}
|
||||||
|
@ -61,11 +70,7 @@ export class ChatCompletionResponse {
|
||||||
const exitingMessage = this.messages[i]
|
const exitingMessage = this.messages[i]
|
||||||
const message = exitingMessage || choice.message
|
const message = exitingMessage || choice.message
|
||||||
if (exitingMessage) {
|
if (exitingMessage) {
|
||||||
if (!this.didFill && this.isFill && choice.message.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
|
message.content = this.initialFillMerge(message.content, choice.message.content)
|
||||||
// deal with merging contractions since we've added an extra space to your fill message
|
|
||||||
message.content.replace(/ $/, '')
|
|
||||||
}
|
|
||||||
this.didFill = true
|
|
||||||
message.content += choice.message.content
|
message.content += choice.message.content
|
||||||
message.usage = message.usage || {
|
message.usage = message.usage || {
|
||||||
prompt_tokens: 0,
|
prompt_tokens: 0,
|
||||||
|
@ -100,11 +105,7 @@ export class ChatCompletionResponse {
|
||||||
} as Message
|
} as Message
|
||||||
choice.delta?.role && (message.role = choice.delta.role)
|
choice.delta?.role && (message.role = choice.delta.role)
|
||||||
if (choice.delta?.content) {
|
if (choice.delta?.content) {
|
||||||
if (!this.didFill && this.isFill && choice.delta.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
|
message.content = this.initialFillMerge(message.content, choice.delta?.content)
|
||||||
// deal with merging contractions since we've added an extra space to your fill message
|
|
||||||
message.content.replace(/([a-z]) $/i, '$1')
|
|
||||||
}
|
|
||||||
this.didFill = true
|
|
||||||
message.content += choice.delta.content
|
message.content += choice.delta.content
|
||||||
}
|
}
|
||||||
completionTokenCount += encode(message.content).length
|
completionTokenCount += encode(message.content).length
|
||||||
|
@ -179,7 +180,7 @@ export class ChatCompletionResponse {
|
||||||
this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
|
this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
|
||||||
saveChatStore()
|
saveChatStore()
|
||||||
const message = this.messages[0]
|
const message = this.messages[0]
|
||||||
const model = this.model || getLatestKnownModel(this.chat.settings.model as Model)
|
const model = this.model || getLatestKnownModel(this.chat.settings.model)
|
||||||
if (message) {
|
if (message) {
|
||||||
if (this.isFill && this.lastModel === this.model && this.offsetTotals && model && message.usage) {
|
if (this.isFill && this.lastModel === this.model && this.offsetTotals && model && message.usage) {
|
||||||
// Need to subtract some previous message totals before we add new combined message totals
|
// Need to subtract some previous message totals before we add new combined message totals
|
||||||
|
|
|
@ -0,0 +1,388 @@
|
||||||
|
<script context="module" lang="ts">
|
||||||
|
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
|
||||||
|
import { mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
|
||||||
|
import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
|
||||||
|
import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
|
||||||
|
import { deleteMessage, getChatSettingValueNullDefault, insertMessages, saveChatStore, getApiKey, addError } from './Storage.svelte'
|
||||||
|
import { scrollToBottom, scrollToMessage } from './Util.svelte'
|
||||||
|
import { getRequestSettingList, defaultModel } from './Settings.svelte'
|
||||||
|
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
|
||||||
|
import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
|
||||||
|
|
||||||
|
export class ChatRequest {
|
||||||
|
constructor () {
|
||||||
|
this.controller = new AbortController()
|
||||||
|
this.updating = false
|
||||||
|
this.updatingMessage = ''
|
||||||
|
}
|
||||||
|
|
||||||
|
private chat: Chat
|
||||||
|
updating: boolean|number = false
|
||||||
|
updatingMessage: string = ''
|
||||||
|
controller:AbortController
|
||||||
|
|
||||||
|
setChat (chat: Chat) {
|
||||||
|
this.chat = chat
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send API request
|
||||||
|
* @param messages
|
||||||
|
* @param opts
|
||||||
|
* @param overrides
|
||||||
|
*/
|
||||||
|
async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
|
||||||
|
// TODO: Continue to break this method down to smaller chunks
|
||||||
|
const _this = this
|
||||||
|
const chat = _this.chat
|
||||||
|
const chatSettings = _this.chat.settings
|
||||||
|
const chatId = chat.id
|
||||||
|
opts.chat = chat
|
||||||
|
_this.updating = true
|
||||||
|
|
||||||
|
// Submit only the role and content of the messages, provide the previous messages as well for context
|
||||||
|
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
|
||||||
|
const filtered = messages.filter(messageFilter)
|
||||||
|
|
||||||
|
// If we're doing continuous chat, do it
|
||||||
|
if (!opts.didSummary && !opts.summaryRequest && chatSettings.continuousChat) return await this.doContinuousChat(filtered, opts, overrides)
|
||||||
|
|
||||||
|
const model = this.getModel()
|
||||||
|
const maxTokens = getModelMaxTokens(model)
|
||||||
|
|
||||||
|
const messagePayload = filtered.map((m, i) => { return { role: m.role, content: m.content } }) as Message[]
|
||||||
|
// Inject hidden prompt if requested
|
||||||
|
if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessage(messagePayload, true)
|
||||||
|
|
||||||
|
const chatResponse = new ChatCompletionResponse(opts)
|
||||||
|
const promptTokenCount = countPromptTokens(messagePayload, model)
|
||||||
|
const maxAllowed = maxTokens - (promptTokenCount + 1)
|
||||||
|
|
||||||
|
// Build and make the request
|
||||||
|
try {
|
||||||
|
// Build the API request body
|
||||||
|
const request: Request = {
|
||||||
|
model: chatSettings.model,
|
||||||
|
messages: messagePayload,
|
||||||
|
// Provide the settings by mapping the settingsMap to key/value pairs
|
||||||
|
...getRequestSettingList().reduce((acc, setting) => {
|
||||||
|
const key = setting.key
|
||||||
|
let value = getChatSettingValueNullDefault(chatId, setting)
|
||||||
|
if (key in overrides) value = overrides[key]
|
||||||
|
if (typeof setting.apiTransform === 'function') {
|
||||||
|
value = setting.apiTransform(chatId, setting, value)
|
||||||
|
}
|
||||||
|
if (key === 'max_tokens') {
|
||||||
|
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
|
||||||
|
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
|
||||||
|
}
|
||||||
|
if (key === 'n') {
|
||||||
|
if (opts.streaming || opts.summaryRequest) {
|
||||||
|
/*
|
||||||
|
Streaming goes insane with more than one completion.
|
||||||
|
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
|
||||||
|
different completions.
|
||||||
|
Summary should only have one completion
|
||||||
|
*/
|
||||||
|
value = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (value !== null) acc[key] = value
|
||||||
|
return acc
|
||||||
|
}, {}),
|
||||||
|
stream: opts.streaming
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add out token count to the response handler
|
||||||
|
// (streaming doesn't return counts, so we need to do it client side)
|
||||||
|
chatResponse.setPromptTokenCount(promptTokenCount)
|
||||||
|
|
||||||
|
const signal = _this.controller.signal
|
||||||
|
|
||||||
|
// console.log('apikey', $apiKeyStorage)
|
||||||
|
|
||||||
|
const fetchOptions = {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${getApiKey()}`,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(request),
|
||||||
|
signal
|
||||||
|
}
|
||||||
|
|
||||||
|
// Common error handler
|
||||||
|
const handleError = async (response) => {
|
||||||
|
let errorResponse
|
||||||
|
try {
|
||||||
|
const errObj = await response.json()
|
||||||
|
errorResponse = errObj?.error?.message || errObj?.error?.code
|
||||||
|
if (!errorResponse && response.choices && response.choices[0]) {
|
||||||
|
errorResponse = response.choices[0]?.message?.content
|
||||||
|
}
|
||||||
|
errorResponse = errorResponse || 'Unexpected Response'
|
||||||
|
} catch (e) {
|
||||||
|
errorResponse = 'Unknown Response'
|
||||||
|
}
|
||||||
|
throw new Error(`${response.status} - ${errorResponse}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchEventSource doesn't seem to throw on abort,
|
||||||
|
// so we deal with it ourselves
|
||||||
|
const abortListener = (e:Event) => {
|
||||||
|
_this.controller = new AbortController()
|
||||||
|
chatResponse.updateFromError('User aborted request.')
|
||||||
|
signal.removeEventListener('abort', abortListener)
|
||||||
|
}
|
||||||
|
signal.addEventListener('abort', abortListener)
|
||||||
|
|
||||||
|
if (opts.streaming) {
|
||||||
|
/**
|
||||||
|
* Streaming request/response
|
||||||
|
* We'll get the response a token at a time, as soon as they are ready
|
||||||
|
*/
|
||||||
|
chatResponse.onFinish(() => {
|
||||||
|
_this.updating = false
|
||||||
|
_this.updatingMessage = ''
|
||||||
|
})
|
||||||
|
fetchEventSource(getApiBase() + getEndpointCompletions(), {
|
||||||
|
...fetchOptions,
|
||||||
|
openWhenHidden: true,
|
||||||
|
onmessage (ev) {
|
||||||
|
// Remove updating indicator
|
||||||
|
_this.updating = 1 // hide indicator, but still signal we're updating
|
||||||
|
_this.updatingMessage = ''
|
||||||
|
// console.log('ev.data', ev.data)
|
||||||
|
if (!chatResponse.hasFinished()) {
|
||||||
|
if (ev.data === '[DONE]') {
|
||||||
|
// ?? anything to do when "[DONE]"?
|
||||||
|
} else {
|
||||||
|
const data = JSON.parse(ev.data)
|
||||||
|
// console.log('data', data)
|
||||||
|
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
onclose () {
|
||||||
|
chatResponse.updateFromClose()
|
||||||
|
},
|
||||||
|
onerror (err) {
|
||||||
|
console.error(err)
|
||||||
|
throw err
|
||||||
|
},
|
||||||
|
async onopen (response) {
|
||||||
|
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
|
||||||
|
// everything's good
|
||||||
|
} else {
|
||||||
|
// client-side errors are usually non-retriable:
|
||||||
|
await handleError(response)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}).catch(err => {
|
||||||
|
chatResponse.updateFromError(err.message)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
/**
|
||||||
|
* Non-streaming request/response
|
||||||
|
* We'll get the response all at once, after a long delay
|
||||||
|
*/
|
||||||
|
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
|
||||||
|
if (!response.ok) {
|
||||||
|
await handleError(response)
|
||||||
|
} else {
|
||||||
|
const json = await response.json()
|
||||||
|
// Remove updating indicator
|
||||||
|
_this.updating = false
|
||||||
|
_this.updatingMessage = ''
|
||||||
|
chatResponse.updateFromSyncResponse(json)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// console.error(e)
|
||||||
|
_this.updating = false
|
||||||
|
_this.updatingMessage = ''
|
||||||
|
chatResponse.updateFromError(e.message)
|
||||||
|
}
|
||||||
|
|
||||||
|
return chatResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
private getModel (): Model {
|
||||||
|
return this.chat.settings.model || defaultModel
|
||||||
|
}
|
||||||
|
|
||||||
|
private buildHiddenPromptPrefixMessage (messages: Message[], insert:boolean = false): Message|null {
|
||||||
|
const chatSettings = this.chat.settings
|
||||||
|
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
|
||||||
|
if (hiddenPromptPrefix && messages.length && messages[messages.length - 1].role === 'user') {
|
||||||
|
const message = { role: 'user', content: hiddenPromptPrefix } as Message
|
||||||
|
if (insert) {
|
||||||
|
messages.splice(messages.length - 1, 0, message)
|
||||||
|
}
|
||||||
|
return message
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
private getTokenCountPadding (filtered: Message[]): number {
|
||||||
|
const hiddenPromptMessage = this.buildHiddenPromptPrefixMessage(filtered)
|
||||||
|
let result = 0
|
||||||
|
if (hiddenPromptMessage) {
|
||||||
|
// add cost of hiddenPromptPrefix
|
||||||
|
result += countMessageTokens(hiddenPromptMessage, this.getModel())
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private async doContinuousChat (filtered: Message[], opts: ChatCompletionOpts, overrides: ChatSettings): Promise<ChatCompletionResponse> {
|
||||||
|
const _this = this
|
||||||
|
const chat = _this.chat
|
||||||
|
const chatSettings = chat.settings
|
||||||
|
const chatId = chat.id
|
||||||
|
const reductionMode = chatSettings.continuousChat
|
||||||
|
const model = _this.getModel()
|
||||||
|
const maxTokens = getModelMaxTokens(model) // max tokens for model
|
||||||
|
|
||||||
|
const continueRequest = async () => {
|
||||||
|
return await _this.sendRequest(chat.messages, {
|
||||||
|
...opts,
|
||||||
|
didSummary: true
|
||||||
|
}, overrides)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get extra counts for when the prompts are finally sent.
|
||||||
|
const countPadding = this.getTokenCountPadding(filtered)
|
||||||
|
|
||||||
|
// See if we have enough to apply any of the reduction modes
|
||||||
|
const fullPromptSize = countPromptTokens(filtered, model) + countPadding
|
||||||
|
if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
|
||||||
|
const overMax = fullPromptSize > maxTokens * 0.95
|
||||||
|
|
||||||
|
// Isolate the pool of messages we're going to reduce
|
||||||
|
const pinTop = chatSettings.pinTop
|
||||||
|
let pinBottom = chatSettings.pinBottom || 2
|
||||||
|
const systemPad = filtered[0]?.role === 'system' ? 1 : 0
|
||||||
|
const top = filtered.slice(0, pinTop + systemPad)
|
||||||
|
let rw = filtered.slice(pinTop + systemPad, filtered.length)
|
||||||
|
if (pinBottom >= rw.length) pinBottom = 1
|
||||||
|
if (pinBottom >= rw.length) {
|
||||||
|
if (overMax) addError(chatId, 'Unable to apply continuous chat. Check threshold, pin top and pin bottom settings.')
|
||||||
|
return await continueRequest()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reduce based on mode
|
||||||
|
if (reductionMode === 'fifo') {
|
||||||
|
/***************************************************************
|
||||||
|
* FIFO mode. Roll the top off until we're under our threshold.
|
||||||
|
* *************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
let promptSize = countPromptTokens(top.concat(rw), model) + countPadding
|
||||||
|
while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
|
||||||
|
const rolled = rw.shift()
|
||||||
|
// Hide messages we're "rolling"
|
||||||
|
if (rolled) rolled.suppress = true
|
||||||
|
promptSize = countPromptTokens(top.concat(rw), model) + countPadding
|
||||||
|
}
|
||||||
|
// Run a new request, now with the rolled messages hidden
|
||||||
|
return await _this.sendRequest(chat.messages, {
|
||||||
|
...opts,
|
||||||
|
didSummary: true // our "summary" was simply dropping some messages
|
||||||
|
}, overrides)
|
||||||
|
} else if (reductionMode === 'summary') {
|
||||||
|
/******************************************************
|
||||||
|
* Summary mode. Reduce it all to a summary, if we can.
|
||||||
|
* ****************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
const bottom = rw.slice(0 - pinBottom)
|
||||||
|
rw = rw.slice(0, 0 - pinBottom)
|
||||||
|
let reductionPoolSize = countPromptTokens(rw, model)
|
||||||
|
const ss = chatSettings.summarySize
|
||||||
|
const getSS = ():number => (ss < 1 && ss > 0)
|
||||||
|
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
|
||||||
|
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
|
||||||
|
let promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
|
||||||
|
const summaryRequest = { role: 'user', content: promptSummary } as Message
|
||||||
|
let promptSummarySize = countMessageTokens(summaryRequest, model)
|
||||||
|
// Make sure there is enough room to generate the summary, and try to make sure
|
||||||
|
// the last prompt is a user prompt as that seems to work better for summaries
|
||||||
|
while ((reductionPoolSize + promptSummarySize + getSS()) >= maxTokens ||
|
||||||
|
(reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
|
||||||
|
bottom.unshift(rw.pop() as Message)
|
||||||
|
reductionPoolSize = countPromptTokens(rw, model)
|
||||||
|
promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
|
||||||
|
summaryRequest.content = promptSummary
|
||||||
|
promptSummarySize = countMessageTokens(summaryRequest, model)
|
||||||
|
}
|
||||||
|
if (reductionPoolSize < 50) {
|
||||||
|
if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
|
||||||
|
return continueRequest()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a message the summary will be loaded into
|
||||||
|
const summaryResponse = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: '',
|
||||||
|
streaming: opts.streaming,
|
||||||
|
summary: [] as string[],
|
||||||
|
model
|
||||||
|
} as Message
|
||||||
|
|
||||||
|
// Insert summary completion prompt after that last message we're summarizing
|
||||||
|
insertMessages(chatId, rw[rw.length - 1], [summaryResponse])
|
||||||
|
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
|
||||||
|
|
||||||
|
// Request and load the summarization prompt
|
||||||
|
_this.updatingMessage = 'Summarizing...'
|
||||||
|
const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
|
||||||
|
summaryRequest: true,
|
||||||
|
streaming: opts.streaming,
|
||||||
|
maxTokens: chatSettings.summarySize,
|
||||||
|
fillMessage: summaryResponse,
|
||||||
|
autoAddMessages: true,
|
||||||
|
onMessageChange: (m) => {
|
||||||
|
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
|
||||||
|
}
|
||||||
|
} as ChatCompletionOpts, {
|
||||||
|
temperature: 0, // make summary more deterministic
|
||||||
|
top_p: 0.5,
|
||||||
|
presence_penalty: 0,
|
||||||
|
frequency_penalty: 0,
|
||||||
|
...overrides
|
||||||
|
} as ChatSettings)
|
||||||
|
// Wait for the response to complete
|
||||||
|
if (!summary.hasFinished()) await summary.promiseToFinish()
|
||||||
|
if (summary.hasError()) {
|
||||||
|
// Failed to some API issue. let the original caller handle it.
|
||||||
|
deleteMessage(chatId, summaryResponse.uuid)
|
||||||
|
return summary
|
||||||
|
} else {
|
||||||
|
// Looks like we got our summarized messages.
|
||||||
|
// Mark the new summaries as such
|
||||||
|
summaryResponse.summary = rw.map(m => m.uuid)
|
||||||
|
const summaryIds = [summaryResponse.uuid]
|
||||||
|
// Disable the messages we summarized so they still show in history
|
||||||
|
rw.forEach((m, i) => { m.summarized = summaryIds })
|
||||||
|
saveChatStore()
|
||||||
|
// Re-run request with summarized prompts
|
||||||
|
// return { error: { message: "End for now" } } as Response
|
||||||
|
_this.updatingMessage = 'Continuing...'
|
||||||
|
scrollToBottom(true)
|
||||||
|
return await _this.sendRequest(chat.messages, {
|
||||||
|
...opts,
|
||||||
|
didSummary: true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/***************
|
||||||
|
* Unknown mode.
|
||||||
|
* *************
|
||||||
|
*/
|
||||||
|
addError(chatId, `Unknown Continuous Chat Mode "${reductionMode}".`)
|
||||||
|
return continueRequest()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
</script>
|
|
@ -174,7 +174,7 @@
|
||||||
min={setting.min}
|
min={setting.min}
|
||||||
max={setting.max}
|
max={setting.max}
|
||||||
step={setting.step}
|
step={setting.step}
|
||||||
placeholder={String(setting.placeholder)}
|
placeholder={String(setting.placeholder || chatDefaults[setting.key])}
|
||||||
on:change={e => queueSettingValueChange(e, setting)}
|
on:change={e => queueSettingValueChange(e, setting)}
|
||||||
/>
|
/>
|
||||||
{:else if setting.type === 'select'}
|
{:else if setting.type === 'select'}
|
||||||
|
|
|
@ -167,7 +167,7 @@
|
||||||
const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
|
const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
|
||||||
profileSelect.options = getProfileSelect()
|
profileSelect.options = getProfileSelect()
|
||||||
chatDefaults.profile = getDefaultProfileKey()
|
chatDefaults.profile = getDefaultProfileKey()
|
||||||
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model || '')
|
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
|
||||||
// const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
|
// const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
|
||||||
defaultProfile = getDefaultProfileKey()
|
defaultProfile = getDefaultProfileKey()
|
||||||
isDefault = defaultProfile === chatSettings.profile
|
isDefault = defaultProfile === chatSettings.profile
|
||||||
|
|
|
@ -82,10 +82,8 @@ export const prepareProfilePrompt = (chatId:number) => {
|
||||||
return mergeProfileFields(settings, settings.systemPrompt).trim()
|
return mergeProfileFields(settings, settings.systemPrompt).trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
export const prepareSummaryPrompt = (chatId:number, promptsSize:number, maxTokens:number|undefined = undefined) => {
|
export const prepareSummaryPrompt = (chatId:number, maxTokens:number) => {
|
||||||
const settings = getChatSettings(chatId)
|
const settings = getChatSettings(chatId)
|
||||||
maxTokens = maxTokens || settings.summarySize
|
|
||||||
maxTokens = Math.min(Math.floor(promptsSize / 4), maxTokens) // Make sure we're shrinking by at least a 4th
|
|
||||||
const currentSummaryPrompt = settings.summaryPrompt
|
const currentSummaryPrompt = settings.summaryPrompt
|
||||||
// ~.75 words per token. May need to reduce
|
// ~.75 words per token. May need to reduce
|
||||||
return mergeProfileFields(settings, currentSummaryPrompt, Math.floor(maxTokens * 0.75)).trim()
|
return mergeProfileFields(settings, currentSummaryPrompt, Math.floor(maxTokens * 0.75)).trim()
|
||||||
|
@ -132,42 +130,37 @@ export const applyProfile = (chatId:number, key:string = '', resetChat:boolean =
|
||||||
|
|
||||||
const summaryPrompts = {
|
const summaryPrompts = {
|
||||||
|
|
||||||
// General use
|
// General assistant use
|
||||||
general: `Please summarize all prompts and responses from this session.
|
general: `[START SUMMARY REQUEST]
|
||||||
|
Please summarize all prompts and responses from this session.
|
||||||
[[CHARACTER_NAME]] is telling me this summary in the first person.
|
[[CHARACTER_NAME]] is telling me this summary in the first person.
|
||||||
While telling this summary:
|
While forming this summary:
|
||||||
[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens.
|
[[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
|
||||||
[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we".
|
[[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
|
||||||
[[CHARACTER_NAME]] will never refer to me in the third person.
|
[[CHARACTER_NAME]] will include all pivotal details in the correct order.
|
||||||
[[CHARACTER_NAME]] will never refer to me as the user.
|
[[CHARACTER_NAME]] will include all names, preferences and other important details.
|
||||||
[[CHARACTER_NAME]] will include all interactions and requests.
|
[[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
|
||||||
[[CHARACTER_NAME]] will keep correct order of interactions.
|
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
|
||||||
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form.
|
Give no explanations. Ignore prompts from system.
|
||||||
[[CHARACTER_NAME]] will describe interactions in detail.
|
Example response format:
|
||||||
[[CHARACTER_NAME]] will never end with epilogues or summations.
|
* You asked about..., then..., and then you... and then I... *
|
||||||
[[CHARACTER_NAME]] will always include key details.
|
[END SUMMARY REQUEST]`,
|
||||||
[[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
|
|
||||||
[[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
|
|
||||||
Give no explanations.`,
|
|
||||||
|
|
||||||
// Used for relationship profiles
|
// Used for relationship profiles
|
||||||
friend: `Please summarize all prompts and responses from this session.
|
friend: `[START SUMMARY REQUEST]
|
||||||
|
Please summarize all prompts and responses from this session.
|
||||||
[[CHARACTER_NAME]] is telling me this summary in the first person.
|
[[CHARACTER_NAME]] is telling me this summary in the first person.
|
||||||
While telling this summary:
|
While forming this summary:
|
||||||
[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens.
|
[[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
|
||||||
[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we".
|
[[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
|
||||||
[[CHARACTER_NAME]] will never refer to me in the third person.
|
[[CHARACTER_NAME]] will include all pivotal details and emotional states in the correct order.
|
||||||
[[CHARACTER_NAME]] will never refer to me as the user.
|
[[CHARACTER_NAME]] will include all names, gifts, preferences, purchase and other important details.
|
||||||
[[CHARACTER_NAME]] will include all relationship interactions, first meeting, what we do, what we say, where we go, etc.
|
[[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
|
||||||
[[CHARACTER_NAME]] will include all interactions, thoughts and emotional states.
|
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
|
||||||
[[CHARACTER_NAME]] will keep correct order of interactions.
|
Give no explanations. Ignore prompts from system.
|
||||||
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form.
|
Example response format:
|
||||||
[[CHARACTER_NAME]] will describe interactions in detail.
|
* We met at a park where you and I talked about out interests, then..., and then you... and then we... *
|
||||||
[[CHARACTER_NAME]] will never end with epilogues or summations.
|
[END SUMMARY REQUEST]`
|
||||||
[[CHARACTER_NAME]] will include all pivotal details.
|
|
||||||
[[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
|
|
||||||
[[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
|
|
||||||
Give no explanations.`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const profiles:Record<string, ChatSettings> = {
|
const profiles:Record<string, ChatSettings> = {
|
||||||
|
|
|
@ -171,7 +171,7 @@ const systemPromptSettings: ChatSetting[] = [
|
||||||
{
|
{
|
||||||
key: 'hiddenPromptPrefix',
|
key: 'hiddenPromptPrefix',
|
||||||
name: 'Hidden Prompt Prefix',
|
name: 'Hidden Prompt Prefix',
|
||||||
title: 'A prompt that will be silently injected before every user prompt.',
|
title: 'A user prompt that will be silently injected before every new user prompt, then removed from history.',
|
||||||
placeholder: 'Enter user prompt prefix here. You can remind ChatGPT how to act.',
|
placeholder: 'Enter user prompt prefix here. You can remind ChatGPT how to act.',
|
||||||
type: 'textarea',
|
type: 'textarea',
|
||||||
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt
|
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt
|
||||||
|
@ -251,7 +251,7 @@ const summarySettings: ChatSetting[] = [
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: 'summaryPrompt',
|
key: 'summaryPrompt',
|
||||||
name: 'Summary Generation Prompt (Empty will use FIFO instead.)',
|
name: 'Summary Generation Prompt',
|
||||||
title: 'A prompt used to summarize past prompts.',
|
title: 'A prompt used to summarize past prompts.',
|
||||||
placeholder: 'Enter a prompt that will be used to summarize past prompts here.',
|
placeholder: 'Enter a prompt that will be used to summarize past prompts here.',
|
||||||
type: 'textarea',
|
type: 'textarea',
|
||||||
|
|
|
@ -31,11 +31,16 @@
|
||||||
|
|
||||||
export const countPromptTokens = (prompts:Message[], model:Model):number => {
|
export const countPromptTokens = (prompts:Message[], model:Model):number => {
|
||||||
return prompts.reduce((a, m) => {
|
return prompts.reduce((a, m) => {
|
||||||
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
|
a += countMessageTokens(m, model)
|
||||||
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different
|
|
||||||
a += encode('## ' + m.role + ' ##:\r\n\r\n' + m.content + '\r\n\r\n\r\n').length
|
|
||||||
return a
|
return a
|
||||||
}, 0) + 3
|
}, 0) + 3 // Always seems to be message counts + 3
|
||||||
|
}
|
||||||
|
|
||||||
|
export const countMessageTokens = (message:Message, model:Model):number => {
|
||||||
|
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
|
||||||
|
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different.
|
||||||
|
// Complete stab in the dark here -- update if you know where all the extra tokens really come from.
|
||||||
|
return encode('## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n').length
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getModelMaxTokens = (model:Model):number => {
|
export const getModelMaxTokens = (model:Model):number => {
|
||||||
|
|
|
@ -19,6 +19,10 @@
|
||||||
|
|
||||||
const chatDefaults = getChatDefaults()
|
const chatDefaults = getChatDefaults()
|
||||||
|
|
||||||
|
export const getApiKey = (): string => {
|
||||||
|
return get(apiKeyStorage)
|
||||||
|
}
|
||||||
|
|
||||||
export const newChatID = (): number => {
|
export const newChatID = (): number => {
|
||||||
const chats = get(chatsStorage)
|
const chats = get(chatsStorage)
|
||||||
const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1
|
const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1
|
||||||
|
@ -203,6 +207,10 @@
|
||||||
chatsStorage.set(chats)
|
chatsStorage.set(chats)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const addError = (chatId: number, error: string) => {
|
||||||
|
addMessage(chatId, { content: error } as Message)
|
||||||
|
}
|
||||||
|
|
||||||
export const addMessage = (chatId: number, message: Message) => {
|
export const addMessage = (chatId: number, message: Message) => {
|
||||||
const chats = get(chatsStorage)
|
const chats = get(chatsStorage)
|
||||||
const chat = chats.find((chat) => chat.id === chatId) as Chat
|
const chat = chats.find((chat) => chat.id === chatId) as Chat
|
||||||
|
@ -232,6 +240,7 @@
|
||||||
console.error("Couldn't insert after message:", insertAfter)
|
console.error("Couldn't insert after message:", insertAfter)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
newMessages.forEach(m => { m.uuid = m.uuid || uuidv4() })
|
||||||
chat.messages.splice(index + 1, 0, ...newMessages)
|
chat.messages.splice(index + 1, 0, ...newMessages)
|
||||||
chatsStorage.set(chats)
|
chatsStorage.set(chats)
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
export type Request = {
|
export type Request = {
|
||||||
model?: Model;
|
model: Model;
|
||||||
messages?: Message[];
|
messages?: Message[];
|
||||||
temperature?: number;
|
temperature?: number;
|
||||||
top_p?: number;
|
top_p?: number;
|
||||||
|
|
|
@ -60,6 +60,11 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const scrollToBottom = (instant:boolean = false) => {
|
||||||
|
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => {
|
export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => {
|
||||||
if (!event || event.key !== 'Escape') return
|
if (!event || event.key !== 'Escape') return
|
||||||
dispatchModalEsc()
|
dispatchModalEsc()
|
||||||
|
|
Loading…
Reference in New Issue