sendRequest refactor
This commit is contained in:
parent
2660512830
commit
66336a0a13
|
@ -2,36 +2,22 @@
|
|||
// This beast needs to be broken down into multiple components before it gets any worse.
|
||||
import {
|
||||
saveChatStore,
|
||||
apiKeyStorage,
|
||||
chatsStorage,
|
||||
addMessage,
|
||||
insertMessages,
|
||||
getChatSettingValueNullDefault,
|
||||
updateChatSettings,
|
||||
checkStateChange,
|
||||
showSetChatSettings,
|
||||
submitExitingPromptsNow,
|
||||
deleteMessage,
|
||||
continueMessage,
|
||||
getMessage
|
||||
} from './Storage.svelte'
|
||||
import { getRequestSettingList, defaultModel } from './Settings.svelte'
|
||||
import {
|
||||
type Request,
|
||||
type Message,
|
||||
type Chat,
|
||||
type ChatCompletionOpts,
|
||||
|
||||
type Model,
|
||||
|
||||
type ChatSettings
|
||||
|
||||
|
||||
type Chat
|
||||
} from './Types.svelte'
|
||||
import Prompts from './Prompts.svelte'
|
||||
import Messages from './Messages.svelte'
|
||||
import { mergeProfileFields, prepareSummaryPrompt, restartProfile } from './Profiles.svelte'
|
||||
|
||||
import { restartProfile } from './Profiles.svelte'
|
||||
import { afterUpdate, onMount, onDestroy } from 'svelte'
|
||||
import Fa from 'svelte-fa/src/fa.svelte'
|
||||
import {
|
||||
|
@ -41,27 +27,29 @@
|
|||
faPenToSquare,
|
||||
faMicrophone,
|
||||
faLightbulb,
|
||||
faCommentSlash
|
||||
faCommentSlash,
|
||||
|
||||
faCircleCheck
|
||||
|
||||
} from '@fortawesome/free-solid-svg-icons/index'
|
||||
import { encode } from 'gpt-tokenizer'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import { countPromptTokens, getModelMaxTokens, getPrice } from './Stats.svelte'
|
||||
import { autoGrowInputOnEvent, scrollToMessage, sizeTextElements } from './Util.svelte'
|
||||
import { getPrice } from './Stats.svelte'
|
||||
import { autoGrowInputOnEvent, scrollToBottom, sizeTextElements } from './Util.svelte'
|
||||
import ChatSettingsModal from './ChatSettingsModal.svelte'
|
||||
import Footer from './Footer.svelte'
|
||||
import { openModal } from 'svelte-modals'
|
||||
import PromptInput from './PromptInput.svelte'
|
||||
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
|
||||
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
|
||||
import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
|
||||
import { ChatRequest } from './ChatRequest.svelte'
|
||||
|
||||
export let params = { chatId: '' }
|
||||
const chatId: number = parseInt(params.chatId)
|
||||
|
||||
let controller:AbortController = new AbortController()
|
||||
let chatRequest = new ChatRequest()
|
||||
|
||||
let updating: boolean|number = false
|
||||
let updatingMessage: string = ''
|
||||
// let controller:AbortController
|
||||
|
||||
// let updating: boolean|number = false
|
||||
// let updatingMessage: string = ''
|
||||
let input: HTMLTextAreaElement
|
||||
let recognition: any = null
|
||||
let recording = false
|
||||
|
@ -111,12 +99,15 @@
|
|||
onDestroy(async () => {
|
||||
// clean up
|
||||
// abort any pending requests.
|
||||
controller.abort()
|
||||
chatRequest.controller.abort()
|
||||
ttsStop()
|
||||
})
|
||||
|
||||
onMount(async () => {
|
||||
if (!chat) return
|
||||
|
||||
chatRequest = new ChatRequest()
|
||||
chatRequest.setChat(chat)
|
||||
// Focus the input on mount
|
||||
focusInput()
|
||||
|
||||
|
@ -170,349 +161,8 @@
|
|||
scrollToBottom()
|
||||
}
|
||||
|
||||
const scrollToBottom = (instant:boolean = false) => {
|
||||
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
|
||||
}
|
||||
|
||||
// Send API request
|
||||
const sendRequest = async (messages: Message[], opts:ChatCompletionOpts, overrides:ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> => {
|
||||
// Show updating bar
|
||||
opts.chat = chat
|
||||
const chatResponse = new ChatCompletionResponse(opts)
|
||||
updating = true
|
||||
|
||||
const model = chat.settings.model || defaultModel
|
||||
const maxTokens = getModelMaxTokens(model) // max tokens for model
|
||||
|
||||
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
|
||||
|
||||
// Submit only the role and content of the messages, provide the previous messages as well for context
|
||||
let filtered = messages.filter(messageFilter)
|
||||
|
||||
// Get an estimate of the total prompt size we're sending
|
||||
let promptTokenCount:number = countPromptTokens(filtered, model)
|
||||
|
||||
let summarySize = chatSettings.summarySize
|
||||
|
||||
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
|
||||
|
||||
if (hiddenPromptPrefix && filtered.length && filtered[filtered.length - 1].role === 'user') {
|
||||
// update estimate with hiddenPromptPrefix token count
|
||||
promptTokenCount += encode(hiddenPromptPrefix + '\n\n').length
|
||||
}
|
||||
|
||||
// console.log('Estimated',promptTokenCount,'prompt token for this request')
|
||||
|
||||
if (chatSettings.continuousChat && !opts.didSummary &&
|
||||
!opts.summaryRequest && !opts.maxTokens &&
|
||||
promptTokenCount > chatSettings.summaryThreshold) {
|
||||
// Too many tokens -- well need to summarize some past ones else we'll run out of space
|
||||
// Get a block of past prompts we'll summarize
|
||||
let pinTop = chatSettings.pinTop
|
||||
const tp = chatSettings.trainingPrompts
|
||||
pinTop = Math.max(pinTop, tp ? 1 : 0)
|
||||
let pinBottom = chatSettings.pinBottom
|
||||
const systemPad = (filtered[0] || {} as Message).role === 'system' ? 1 : 0
|
||||
const mlen = filtered.length - systemPad // always keep system prompt
|
||||
let diff = mlen - (pinTop + pinBottom)
|
||||
const useFIFO = chatSettings.continuousChat === 'fifo' || !prepareSummaryPrompt(chatId, 0)
|
||||
if (!useFIFO) {
|
||||
while (diff <= 3 && (pinTop > 0 || pinBottom > 1)) {
|
||||
// Not enough prompts exposed to summarize
|
||||
// try to open up pinTop and pinBottom to see if we can get more to summarize
|
||||
if (pinTop === 1 && pinBottom > 1) {
|
||||
// If we have a pin top, try to keep some of it as long as we can
|
||||
pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
|
||||
} else {
|
||||
pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
|
||||
pinTop = Math.max(Math.floor(pinTop / 2), 0)
|
||||
}
|
||||
diff = mlen - (pinTop + pinBottom)
|
||||
}
|
||||
}
|
||||
if (!useFIFO && diff > 0) {
|
||||
// We've found at least one prompt we can try to summarize
|
||||
// Reduce to prompts we'll send in for summary
|
||||
// (we may need to update this to not include the pin-top, but the context it provides seems to help in the accuracy of the summary)
|
||||
const summarize = filtered.slice(0, filtered.length - pinBottom)
|
||||
// Estimate token count of what we'll be summarizing
|
||||
let sourceTokenCount = countPromptTokens(summarize, model)
|
||||
// build summary prompt message
|
||||
let summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
||||
|
||||
const summaryMessage = {
|
||||
role: 'user',
|
||||
content: summaryPrompt
|
||||
} as Message
|
||||
// get an estimate of how many tokens this request + max completions could be
|
||||
let summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
|
||||
// reduce summary size to make sure we're not requesting a summary larger than our prompts
|
||||
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
||||
// Make sure our prompt + completion request isn't too large
|
||||
while (summarize.length - (pinTop + systemPad) >= 3 && summaryPromptSize + summarySize > maxTokens && summarySize >= 4) {
|
||||
summarize.pop()
|
||||
sourceTokenCount = countPromptTokens(summarize, model)
|
||||
summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
|
||||
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
||||
}
|
||||
// See if we have to adjust our max summarySize
|
||||
if (summaryPromptSize + summarySize > maxTokens) {
|
||||
summarySize = maxTokens - summaryPromptSize
|
||||
}
|
||||
// Always try to end the prompts being summarized with a user prompt. Seems to work better.
|
||||
while (summarize.length - (pinTop + systemPad) >= 4 && summarize[summarize.length - 1].role !== 'user') {
|
||||
summarize.pop()
|
||||
}
|
||||
// update with actual
|
||||
sourceTokenCount = countPromptTokens(summarize, model)
|
||||
summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
|
||||
summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
|
||||
summaryMessage.content = summaryPrompt
|
||||
if (sourceTokenCount > 20 && summaryPrompt && summarySize > 4) {
|
||||
// get prompt we'll be inserting after
|
||||
const endPrompt = summarize[summarize.length - 1]
|
||||
// Add a prompt to ask to summarize them
|
||||
const summarizeReq = summarize.slice()
|
||||
summarizeReq.push(summaryMessage)
|
||||
summaryPromptSize = countPromptTokens(summarizeReq, model)
|
||||
|
||||
// Create a message the summary will be loaded into
|
||||
const summaryResponse:Message = {
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
uuid: uuidv4(),
|
||||
streaming: opts.streaming,
|
||||
summary: []
|
||||
}
|
||||
summaryResponse.model = model
|
||||
|
||||
// Insert summary completion prompt
|
||||
insertMessages(chatId, endPrompt, [summaryResponse])
|
||||
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
|
||||
|
||||
// Wait for the summary completion
|
||||
updatingMessage = 'Summarizing...'
|
||||
const summary = await sendRequest(summarizeReq, {
|
||||
summaryRequest: true,
|
||||
streaming: opts.streaming,
|
||||
maxTokens: summarySize,
|
||||
fillMessage: summaryResponse,
|
||||
autoAddMessages: true,
|
||||
onMessageChange: (m) => {
|
||||
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
|
||||
}
|
||||
} as ChatCompletionOpts, {
|
||||
temperature: 0, // make summary more deterministic
|
||||
top_p: 0.2,
|
||||
presence_penalty: -0.5,
|
||||
frequency_penalty: 0,
|
||||
...overrides
|
||||
} as ChatSettings)
|
||||
if (!summary.hasFinished()) await summary.promiseToFinish()
|
||||
if (summary.hasError()) {
|
||||
// Failed to some API issue. let the original caller handle it.
|
||||
deleteMessage(chatId, summaryResponse.uuid)
|
||||
return summary
|
||||
} else {
|
||||
// Looks like we got our summarized messages.
|
||||
// get ids of messages we summarized
|
||||
const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
|
||||
// Mark the new summaries as such
|
||||
summaryResponse.summary = summarizedIds
|
||||
|
||||
const summaryIds = [summaryResponse.uuid]
|
||||
// Disable the messages we summarized so they still show in history
|
||||
summarize.forEach((m, i) => {
|
||||
if (i - systemPad >= pinTop) {
|
||||
m.summarized = summaryIds
|
||||
}
|
||||
})
|
||||
saveChatStore()
|
||||
// Re-run request with summarized prompts
|
||||
// return { error: { message: "End for now" } } as Response
|
||||
updatingMessage = 'Continuing...'
|
||||
opts.didSummary = true
|
||||
return await sendRequest(chat.messages, opts)
|
||||
}
|
||||
} else if (!summaryPrompt) {
|
||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
|
||||
} else if (sourceTokenCount <= 20) {
|
||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough words in past content to summarize.', uuid: uuidv4() })
|
||||
}
|
||||
} else if (!useFIFO && diff < 1) {
|
||||
addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough messages in past content to summarize.', uuid: uuidv4() })
|
||||
} else {
|
||||
// roll-off/fifo mode
|
||||
const top = filtered.slice(0, pinTop + systemPad)
|
||||
const rollaway = filtered.slice(pinTop + systemPad)
|
||||
let promptTokenCount = countPromptTokens(top.concat(rollaway), model)
|
||||
// suppress messages we're rolling off
|
||||
while (rollaway.length > (((promptTokenCount + (chatSettings.max_tokens || 1)) > maxTokens) ? pinBottom || 1 : 1) &&
|
||||
promptTokenCount >= chatSettings.summaryThreshold) {
|
||||
const rollOff = rollaway.shift()
|
||||
if (rollOff) rollOff.suppress = true
|
||||
promptTokenCount = countPromptTokens(top.concat(rollaway), model)
|
||||
}
|
||||
saveChatStore()
|
||||
// get a new list now excluding them
|
||||
filtered = messages.filter(messageFilter)
|
||||
}
|
||||
}
|
||||
|
||||
const messagePayload = filtered.map((m, i) => {
|
||||
const r = { role: m.role, content: m.content }
|
||||
if (i === filtered.length - 1 && m.role === 'user' && hiddenPromptPrefix && !opts.summaryRequest) {
|
||||
// If the last prompt is a user prompt, and we have a hiddenPromptPrefix, inject it
|
||||
r.content = hiddenPromptPrefix + '\n\n' + m.content
|
||||
}
|
||||
return r
|
||||
}) as Message[]
|
||||
|
||||
// Update token count with actual
|
||||
promptTokenCount = countPromptTokens(messagePayload, model)
|
||||
const maxAllowed = getModelMaxTokens(chatSettings.model as Model) - (promptTokenCount + 1)
|
||||
|
||||
try {
|
||||
const request: Request = {
|
||||
messages: messagePayload,
|
||||
// Provide the settings by mapping the settingsMap to key/value pairs
|
||||
...getRequestSettingList().reduce((acc, setting) => {
|
||||
const key = setting.key
|
||||
let value = getChatSettingValueNullDefault(chatId, setting)
|
||||
if (key in overrides) value = overrides[key]
|
||||
if (typeof setting.apiTransform === 'function') {
|
||||
value = setting.apiTransform(chatId, setting, value)
|
||||
}
|
||||
if (key === 'max_tokens') {
|
||||
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
|
||||
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
|
||||
}
|
||||
if (key === 'n') {
|
||||
if (opts.streaming || opts.summaryRequest) {
|
||||
/*
|
||||
Streaming goes insane with more than one completion.
|
||||
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
|
||||
different completions.
|
||||
Summary should only have one completion
|
||||
*/
|
||||
value = 1
|
||||
}
|
||||
}
|
||||
if (value !== null) acc[key] = value
|
||||
return acc
|
||||
}, {})
|
||||
}
|
||||
|
||||
request.stream = opts.streaming
|
||||
|
||||
chatResponse.setPromptTokenCount(promptTokenCount) // streaming needs this
|
||||
|
||||
const signal = controller.signal
|
||||
|
||||
// console.log('apikey', $apiKeyStorage)
|
||||
|
||||
const fetchOptions = {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${$apiKeyStorage}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(request),
|
||||
signal
|
||||
}
|
||||
|
||||
const handleError = async (response) => {
|
||||
let errorResponse
|
||||
try {
|
||||
const errObj = await response.json()
|
||||
errorResponse = errObj?.error?.message || errObj?.error?.code
|
||||
if (!errorResponse && response.choices && response.choices[0]) {
|
||||
errorResponse = response.choices[0]?.message?.content
|
||||
}
|
||||
errorResponse = errorResponse || 'Unexpected Response'
|
||||
} catch (e) {
|
||||
errorResponse = 'Unknown Response'
|
||||
}
|
||||
throw new Error(`${response.status} - ${errorResponse}`)
|
||||
}
|
||||
|
||||
// fetchEventSource doesn't seem to throw on abort, so...
|
||||
const abortListener = (e:Event) => {
|
||||
controller = new AbortController()
|
||||
chatResponse.updateFromError('User aborted request.')
|
||||
signal.removeEventListener('abort', abortListener)
|
||||
}
|
||||
signal.addEventListener('abort', abortListener)
|
||||
|
||||
if (opts.streaming) {
|
||||
chatResponse.onFinish(() => {
|
||||
updating = false
|
||||
updatingMessage = ''
|
||||
scrollToBottom()
|
||||
})
|
||||
fetchEventSource(getApiBase() + getEndpointCompletions(), {
|
||||
...fetchOptions,
|
||||
openWhenHidden: true,
|
||||
onmessage (ev) {
|
||||
// Remove updating indicator
|
||||
updating = 1 // hide indicator, but still signal we're updating
|
||||
updatingMessage = ''
|
||||
// console.log('ev.data', ev.data)
|
||||
if (!chatResponse.hasFinished()) {
|
||||
if (ev.data === '[DONE]') {
|
||||
// ?? anything to do when "[DONE]"?
|
||||
} else {
|
||||
const data = JSON.parse(ev.data)
|
||||
// console.log('data', data)
|
||||
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
|
||||
}
|
||||
}
|
||||
},
|
||||
onclose () {
|
||||
chatResponse.updateFromClose()
|
||||
},
|
||||
onerror (err) {
|
||||
console.error(err)
|
||||
throw err
|
||||
},
|
||||
async onopen (response) {
|
||||
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
|
||||
// everything's good
|
||||
} else {
|
||||
// client-side errors are usually non-retriable:
|
||||
await handleError(response)
|
||||
}
|
||||
}
|
||||
}).catch(err => {
|
||||
chatResponse.updateFromError(err.message)
|
||||
scrollToBottom()
|
||||
})
|
||||
} else {
|
||||
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
|
||||
if (!response.ok) {
|
||||
await handleError(response)
|
||||
} else {
|
||||
const json = await response.json()
|
||||
// Remove updating indicator
|
||||
updating = false
|
||||
updatingMessage = ''
|
||||
chatResponse.updateFromSyncResponse(json)
|
||||
scrollToBottom()
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// console.error(e)
|
||||
updating = false
|
||||
updatingMessage = ''
|
||||
chatResponse.updateFromError(e.message)
|
||||
scrollToBottom()
|
||||
}
|
||||
|
||||
return chatResponse
|
||||
}
|
||||
|
||||
const addNewMessage = () => {
|
||||
if (updating) return
|
||||
if (chatRequest.updating) return
|
||||
let inputMessage: Message
|
||||
const lastMessage = chat.messages[chat.messages.length - 1]
|
||||
const uuid = uuidv4()
|
||||
|
@ -545,9 +195,21 @@
|
|||
}
|
||||
}
|
||||
|
||||
let waitingForCancel:any = 0
|
||||
|
||||
const cancelRequest = () => {
|
||||
if (!waitingForCancel) {
|
||||
// wait a second for another click to avoid accidental cancel
|
||||
waitingForCancel = setTimeout(() => { waitingForCancel = 0 }, 1000)
|
||||
return
|
||||
}
|
||||
clearTimeout(waitingForCancel); waitingForCancel = 0
|
||||
chatRequest.controller.abort()
|
||||
}
|
||||
|
||||
const submitForm = async (recorded: boolean = false, skipInput: boolean = false, fillMessage: Message|undefined = undefined): Promise<void> => {
|
||||
// Compose the system prompt message if there are no messages yet - disabled for now
|
||||
if (updating) return
|
||||
if (chatRequest.updating) return
|
||||
|
||||
lastSubmitRecorded = recorded
|
||||
|
||||
|
@ -561,9 +223,7 @@
|
|||
} else if (!fillMessage && chat.messages.length && chat.messages[chat.messages.length - 1].finish_reason === 'length') {
|
||||
fillMessage = chat.messages[chat.messages.length - 1]
|
||||
}
|
||||
|
||||
if (fillMessage && fillMessage.content) fillMessage.content += ' ' // add a space
|
||||
|
||||
|
||||
// Clear the input value
|
||||
input.value = ''
|
||||
input.blur()
|
||||
|
@ -573,7 +233,7 @@
|
|||
}
|
||||
focusInput()
|
||||
|
||||
const response = await sendRequest(chat.messages, {
|
||||
const response = await chatRequest.sendRequest(chat.messages, {
|
||||
chat,
|
||||
autoAddMessages: true, // Auto-add and update messages in array
|
||||
streaming: chatSettings.stream,
|
||||
|
@ -600,7 +260,7 @@
|
|||
const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
|
||||
suggestMessages.push(suggestMessage)
|
||||
|
||||
const response = await sendRequest(suggestMessages, {
|
||||
const response = await chatRequest.sendRequest(suggestMessages, {
|
||||
chat,
|
||||
autoAddMessages: false,
|
||||
streaming: false,
|
||||
|
@ -640,7 +300,7 @@
|
|||
|
||||
const recordToggle = () => {
|
||||
ttsStop()
|
||||
if (updating) return
|
||||
if (chatRequest.updating) return
|
||||
// Check if already recording - if so, stop - else start
|
||||
if (recording) {
|
||||
recognition?.stop()
|
||||
|
@ -677,11 +337,11 @@
|
|||
|
||||
<Messages messages={chat.messages} chatId={chatId} />
|
||||
|
||||
{#if updating === true}
|
||||
{#if chatRequest.updating === true}
|
||||
<article class="message is-success assistant-message">
|
||||
<div class="message-body content">
|
||||
<span class="is-loading" ></span>
|
||||
<span>{updatingMessage}</span>
|
||||
<span>{chatRequest.updatingMessage}</span>
|
||||
</div>
|
||||
</article>
|
||||
{/if}
|
||||
|
@ -710,7 +370,7 @@
|
|||
/>
|
||||
</p>
|
||||
<p class="control mic" class:is-hidden={!recognition}>
|
||||
<button class="button" class:is-disabled={updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
|
||||
<button class="button" class:is-disabled={chatRequest.updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
|
||||
><span class="icon"><Fa icon={faMicrophone} /></span></button
|
||||
>
|
||||
</p>
|
||||
|
@ -718,11 +378,17 @@
|
|||
<button title="Chat/Profile Settings" class="button" on:click|preventDefault={showSettingsModal}><span class="icon"><Fa icon={faGear} /></span></button>
|
||||
</p>
|
||||
<p class="control queue">
|
||||
<button title="Queue message, don't send yet" class:is-disabled={updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
|
||||
<button title="Queue message, don't send yet" class:is-disabled={chatRequest.updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
|
||||
</p>
|
||||
{#if updating}
|
||||
{#if chatRequest.updating}
|
||||
<p class="control send">
|
||||
<button title="Cancel Response" class="button is-danger" type="button" on:click={() => { controller.abort() }}><span class="icon"><Fa icon={faCommentSlash} /></span></button>
|
||||
<button title="Cancel Response" class="button is-danger" type="button" on:click={cancelRequest}><span class="icon">
|
||||
{#if waitingForCancel}
|
||||
<Fa icon={faCircleCheck} />
|
||||
{:else}
|
||||
<Fa icon={faCommentSlash} />
|
||||
{/if}
|
||||
</span></button>
|
||||
</p>
|
||||
{:else}
|
||||
<p class="control send">
|
||||
|
|
|
@ -34,7 +34,7 @@ export class ChatCompletionResponse {
|
|||
|
||||
private setModel = (model: Model) => {
|
||||
if (!model) return
|
||||
!this.model && setLatestKnownModel(this.chat.settings.model as Model, model)
|
||||
!this.model && setLatestKnownModel(this.chat.settings.model, model)
|
||||
this.lastModel = this.model || model
|
||||
this.model = model
|
||||
}
|
||||
|
@ -51,6 +51,15 @@ export class ChatCompletionResponse {
|
|||
private messageChangeListeners: ((m: Message[]) => void)[] = []
|
||||
private finishListeners: ((m: Message[]) => void)[] = []
|
||||
|
||||
private initialFillMerge (existingContent:string, newContent:string):string {
|
||||
if (!this.didFill && this.isFill && existingContent && !newContent.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
|
||||
// add a trailing space if our new content isn't a contraction
|
||||
existingContent += ' '
|
||||
}
|
||||
this.didFill = true
|
||||
return existingContent
|
||||
}
|
||||
|
||||
setPromptTokenCount (tokens:number) {
|
||||
this.promptTokenCount = tokens
|
||||
}
|
||||
|
@ -61,11 +70,7 @@ export class ChatCompletionResponse {
|
|||
const exitingMessage = this.messages[i]
|
||||
const message = exitingMessage || choice.message
|
||||
if (exitingMessage) {
|
||||
if (!this.didFill && this.isFill && choice.message.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
|
||||
// deal with merging contractions since we've added an extra space to your fill message
|
||||
message.content.replace(/ $/, '')
|
||||
}
|
||||
this.didFill = true
|
||||
message.content = this.initialFillMerge(message.content, choice.message.content)
|
||||
message.content += choice.message.content
|
||||
message.usage = message.usage || {
|
||||
prompt_tokens: 0,
|
||||
|
@ -100,11 +105,7 @@ export class ChatCompletionResponse {
|
|||
} as Message
|
||||
choice.delta?.role && (message.role = choice.delta.role)
|
||||
if (choice.delta?.content) {
|
||||
if (!this.didFill && this.isFill && choice.delta.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
|
||||
// deal with merging contractions since we've added an extra space to your fill message
|
||||
message.content.replace(/([a-z]) $/i, '$1')
|
||||
}
|
||||
this.didFill = true
|
||||
message.content = this.initialFillMerge(message.content, choice.delta?.content)
|
||||
message.content += choice.delta.content
|
||||
}
|
||||
completionTokenCount += encode(message.content).length
|
||||
|
@ -179,7 +180,7 @@ export class ChatCompletionResponse {
|
|||
this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
|
||||
saveChatStore()
|
||||
const message = this.messages[0]
|
||||
const model = this.model || getLatestKnownModel(this.chat.settings.model as Model)
|
||||
const model = this.model || getLatestKnownModel(this.chat.settings.model)
|
||||
if (message) {
|
||||
if (this.isFill && this.lastModel === this.model && this.offsetTotals && model && message.usage) {
|
||||
// Need to subtract some previous message totals before we add new combined message totals
|
||||
|
|
|
@ -0,0 +1,388 @@
|
|||
<script context="module" lang="ts">
|
||||
import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
|
||||
import { mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
|
||||
import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
|
||||
import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
|
||||
import { deleteMessage, getChatSettingValueNullDefault, insertMessages, saveChatStore, getApiKey, addError } from './Storage.svelte'
|
||||
import { scrollToBottom, scrollToMessage } from './Util.svelte'
|
||||
import { getRequestSettingList, defaultModel } from './Settings.svelte'
|
||||
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
|
||||
import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
|
||||
|
||||
export class ChatRequest {
|
||||
constructor () {
|
||||
this.controller = new AbortController()
|
||||
this.updating = false
|
||||
this.updatingMessage = ''
|
||||
}
|
||||
|
||||
private chat: Chat
|
||||
updating: boolean|number = false
|
||||
updatingMessage: string = ''
|
||||
controller:AbortController
|
||||
|
||||
setChat (chat: Chat) {
|
||||
this.chat = chat
|
||||
}
|
||||
|
||||
/**
|
||||
* Send API request
|
||||
* @param messages
|
||||
* @param opts
|
||||
* @param overrides
|
||||
*/
|
||||
async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
|
||||
// TODO: Continue to break this method down to smaller chunks
|
||||
const _this = this
|
||||
const chat = _this.chat
|
||||
const chatSettings = _this.chat.settings
|
||||
const chatId = chat.id
|
||||
opts.chat = chat
|
||||
_this.updating = true
|
||||
|
||||
// Submit only the role and content of the messages, provide the previous messages as well for context
|
||||
const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
|
||||
const filtered = messages.filter(messageFilter)
|
||||
|
||||
// If we're doing continuous chat, do it
|
||||
if (!opts.didSummary && !opts.summaryRequest && chatSettings.continuousChat) return await this.doContinuousChat(filtered, opts, overrides)
|
||||
|
||||
const model = this.getModel()
|
||||
const maxTokens = getModelMaxTokens(model)
|
||||
|
||||
const messagePayload = filtered.map((m, i) => { return { role: m.role, content: m.content } }) as Message[]
|
||||
// Inject hidden prompt if requested
|
||||
if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessage(messagePayload, true)
|
||||
|
||||
const chatResponse = new ChatCompletionResponse(opts)
|
||||
const promptTokenCount = countPromptTokens(messagePayload, model)
|
||||
const maxAllowed = maxTokens - (promptTokenCount + 1)
|
||||
|
||||
// Build and make the request
|
||||
try {
|
||||
// Build the API request body
|
||||
const request: Request = {
|
||||
model: chatSettings.model,
|
||||
messages: messagePayload,
|
||||
// Provide the settings by mapping the settingsMap to key/value pairs
|
||||
...getRequestSettingList().reduce((acc, setting) => {
|
||||
const key = setting.key
|
||||
let value = getChatSettingValueNullDefault(chatId, setting)
|
||||
if (key in overrides) value = overrides[key]
|
||||
if (typeof setting.apiTransform === 'function') {
|
||||
value = setting.apiTransform(chatId, setting, value)
|
||||
}
|
||||
if (key === 'max_tokens') {
|
||||
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
|
||||
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
|
||||
}
|
||||
if (key === 'n') {
|
||||
if (opts.streaming || opts.summaryRequest) {
|
||||
/*
|
||||
Streaming goes insane with more than one completion.
|
||||
Doesn't seem like there's any way to separate the jumbled mess of deltas for the
|
||||
different completions.
|
||||
Summary should only have one completion
|
||||
*/
|
||||
value = 1
|
||||
}
|
||||
}
|
||||
if (value !== null) acc[key] = value
|
||||
return acc
|
||||
}, {}),
|
||||
stream: opts.streaming
|
||||
}
|
||||
|
||||
// Add out token count to the response handler
|
||||
// (streaming doesn't return counts, so we need to do it client side)
|
||||
chatResponse.setPromptTokenCount(promptTokenCount)
|
||||
|
||||
const signal = _this.controller.signal
|
||||
|
||||
// console.log('apikey', $apiKeyStorage)
|
||||
|
||||
const fetchOptions = {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${getApiKey()}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(request),
|
||||
signal
|
||||
}
|
||||
|
||||
// Common error handler
|
||||
const handleError = async (response) => {
|
||||
let errorResponse
|
||||
try {
|
||||
const errObj = await response.json()
|
||||
errorResponse = errObj?.error?.message || errObj?.error?.code
|
||||
if (!errorResponse && response.choices && response.choices[0]) {
|
||||
errorResponse = response.choices[0]?.message?.content
|
||||
}
|
||||
errorResponse = errorResponse || 'Unexpected Response'
|
||||
} catch (e) {
|
||||
errorResponse = 'Unknown Response'
|
||||
}
|
||||
throw new Error(`${response.status} - ${errorResponse}`)
|
||||
}
|
||||
|
||||
// fetchEventSource doesn't seem to throw on abort,
|
||||
// so we deal with it ourselves
|
||||
const abortListener = (e:Event) => {
|
||||
_this.controller = new AbortController()
|
||||
chatResponse.updateFromError('User aborted request.')
|
||||
signal.removeEventListener('abort', abortListener)
|
||||
}
|
||||
signal.addEventListener('abort', abortListener)
|
||||
|
||||
if (opts.streaming) {
|
||||
/**
|
||||
* Streaming request/response
|
||||
* We'll get the response a token at a time, as soon as they are ready
|
||||
*/
|
||||
chatResponse.onFinish(() => {
|
||||
_this.updating = false
|
||||
_this.updatingMessage = ''
|
||||
})
|
||||
fetchEventSource(getApiBase() + getEndpointCompletions(), {
|
||||
...fetchOptions,
|
||||
openWhenHidden: true,
|
||||
onmessage (ev) {
|
||||
// Remove updating indicator
|
||||
_this.updating = 1 // hide indicator, but still signal we're updating
|
||||
_this.updatingMessage = ''
|
||||
// console.log('ev.data', ev.data)
|
||||
if (!chatResponse.hasFinished()) {
|
||||
if (ev.data === '[DONE]') {
|
||||
// ?? anything to do when "[DONE]"?
|
||||
} else {
|
||||
const data = JSON.parse(ev.data)
|
||||
// console.log('data', data)
|
||||
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
|
||||
}
|
||||
}
|
||||
},
|
||||
onclose () {
|
||||
chatResponse.updateFromClose()
|
||||
},
|
||||
onerror (err) {
|
||||
console.error(err)
|
||||
throw err
|
||||
},
|
||||
async onopen (response) {
|
||||
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
|
||||
// everything's good
|
||||
} else {
|
||||
// client-side errors are usually non-retriable:
|
||||
await handleError(response)
|
||||
}
|
||||
}
|
||||
}).catch(err => {
|
||||
chatResponse.updateFromError(err.message)
|
||||
})
|
||||
} else {
|
||||
/**
|
||||
* Non-streaming request/response
|
||||
* We'll get the response all at once, after a long delay
|
||||
*/
|
||||
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
|
||||
if (!response.ok) {
|
||||
await handleError(response)
|
||||
} else {
|
||||
const json = await response.json()
|
||||
// Remove updating indicator
|
||||
_this.updating = false
|
||||
_this.updatingMessage = ''
|
||||
chatResponse.updateFromSyncResponse(json)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// console.error(e)
|
||||
_this.updating = false
|
||||
_this.updatingMessage = ''
|
||||
chatResponse.updateFromError(e.message)
|
||||
}
|
||||
|
||||
return chatResponse
|
||||
}
|
||||
|
||||
private getModel (): Model {
|
||||
return this.chat.settings.model || defaultModel
|
||||
}
|
||||
|
||||
private buildHiddenPromptPrefixMessage (messages: Message[], insert:boolean = false): Message|null {
|
||||
const chatSettings = this.chat.settings
|
||||
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
|
||||
if (hiddenPromptPrefix && messages.length && messages[messages.length - 1].role === 'user') {
|
||||
const message = { role: 'user', content: hiddenPromptPrefix } as Message
|
||||
if (insert) {
|
||||
messages.splice(messages.length - 1, 0, message)
|
||||
}
|
||||
return message
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private getTokenCountPadding (filtered: Message[]): number {
|
||||
const hiddenPromptMessage = this.buildHiddenPromptPrefixMessage(filtered)
|
||||
let result = 0
|
||||
if (hiddenPromptMessage) {
|
||||
// add cost of hiddenPromptPrefix
|
||||
result += countMessageTokens(hiddenPromptMessage, this.getModel())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private async doContinuousChat (filtered: Message[], opts: ChatCompletionOpts, overrides: ChatSettings): Promise<ChatCompletionResponse> {
|
||||
const _this = this
|
||||
const chat = _this.chat
|
||||
const chatSettings = chat.settings
|
||||
const chatId = chat.id
|
||||
const reductionMode = chatSettings.continuousChat
|
||||
const model = _this.getModel()
|
||||
const maxTokens = getModelMaxTokens(model) // max tokens for model
|
||||
|
||||
const continueRequest = async () => {
|
||||
return await _this.sendRequest(chat.messages, {
|
||||
...opts,
|
||||
didSummary: true
|
||||
}, overrides)
|
||||
}
|
||||
|
||||
// Get extra counts for when the prompts are finally sent.
|
||||
const countPadding = this.getTokenCountPadding(filtered)
|
||||
|
||||
// See if we have enough to apply any of the reduction modes
|
||||
const fullPromptSize = countPromptTokens(filtered, model) + countPadding
|
||||
if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
|
||||
const overMax = fullPromptSize > maxTokens * 0.95
|
||||
|
||||
// Isolate the pool of messages we're going to reduce
|
||||
const pinTop = chatSettings.pinTop
|
||||
let pinBottom = chatSettings.pinBottom || 2
|
||||
const systemPad = filtered[0]?.role === 'system' ? 1 : 0
|
||||
const top = filtered.slice(0, pinTop + systemPad)
|
||||
let rw = filtered.slice(pinTop + systemPad, filtered.length)
|
||||
if (pinBottom >= rw.length) pinBottom = 1
|
||||
if (pinBottom >= rw.length) {
|
||||
if (overMax) addError(chatId, 'Unable to apply continuous chat. Check threshold, pin top and pin bottom settings.')
|
||||
return await continueRequest()
|
||||
}
|
||||
|
||||
// Reduce based on mode
|
||||
if (reductionMode === 'fifo') {
|
||||
/***************************************************************
|
||||
* FIFO mode. Roll the top off until we're under our threshold.
|
||||
* *************************************************************
|
||||
*/
|
||||
|
||||
let promptSize = countPromptTokens(top.concat(rw), model) + countPadding
|
||||
while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
|
||||
const rolled = rw.shift()
|
||||
// Hide messages we're "rolling"
|
||||
if (rolled) rolled.suppress = true
|
||||
promptSize = countPromptTokens(top.concat(rw), model) + countPadding
|
||||
}
|
||||
// Run a new request, now with the rolled messages hidden
|
||||
return await _this.sendRequest(chat.messages, {
|
||||
...opts,
|
||||
didSummary: true // our "summary" was simply dropping some messages
|
||||
}, overrides)
|
||||
} else if (reductionMode === 'summary') {
|
||||
/******************************************************
|
||||
* Summary mode. Reduce it all to a summary, if we can.
|
||||
* ****************************************************
|
||||
*/
|
||||
|
||||
const bottom = rw.slice(0 - pinBottom)
|
||||
rw = rw.slice(0, 0 - pinBottom)
|
||||
let reductionPoolSize = countPromptTokens(rw, model)
|
||||
const ss = chatSettings.summarySize
|
||||
const getSS = ():number => (ss < 1 && ss > 0)
|
||||
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
|
||||
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
|
||||
let promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
|
||||
const summaryRequest = { role: 'user', content: promptSummary } as Message
|
||||
let promptSummarySize = countMessageTokens(summaryRequest, model)
|
||||
// Make sure there is enough room to generate the summary, and try to make sure
|
||||
// the last prompt is a user prompt as that seems to work better for summaries
|
||||
while ((reductionPoolSize + promptSummarySize + getSS()) >= maxTokens ||
|
||||
(reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
|
||||
bottom.unshift(rw.pop() as Message)
|
||||
reductionPoolSize = countPromptTokens(rw, model)
|
||||
promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
|
||||
summaryRequest.content = promptSummary
|
||||
promptSummarySize = countMessageTokens(summaryRequest, model)
|
||||
}
|
||||
if (reductionPoolSize < 50) {
|
||||
if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
|
||||
return continueRequest()
|
||||
}
|
||||
|
||||
// Create a message the summary will be loaded into
|
||||
const summaryResponse = {
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
streaming: opts.streaming,
|
||||
summary: [] as string[],
|
||||
model
|
||||
} as Message
|
||||
|
||||
// Insert summary completion prompt after that last message we're summarizing
|
||||
insertMessages(chatId, rw[rw.length - 1], [summaryResponse])
|
||||
if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
|
||||
|
||||
// Request and load the summarization prompt
|
||||
_this.updatingMessage = 'Summarizing...'
|
||||
const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
|
||||
summaryRequest: true,
|
||||
streaming: opts.streaming,
|
||||
maxTokens: chatSettings.summarySize,
|
||||
fillMessage: summaryResponse,
|
||||
autoAddMessages: true,
|
||||
onMessageChange: (m) => {
|
||||
if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
|
||||
}
|
||||
} as ChatCompletionOpts, {
|
||||
temperature: 0, // make summary more deterministic
|
||||
top_p: 0.5,
|
||||
presence_penalty: 0,
|
||||
frequency_penalty: 0,
|
||||
...overrides
|
||||
} as ChatSettings)
|
||||
// Wait for the response to complete
|
||||
if (!summary.hasFinished()) await summary.promiseToFinish()
|
||||
if (summary.hasError()) {
|
||||
// Failed to some API issue. let the original caller handle it.
|
||||
deleteMessage(chatId, summaryResponse.uuid)
|
||||
return summary
|
||||
} else {
|
||||
// Looks like we got our summarized messages.
|
||||
// Mark the new summaries as such
|
||||
summaryResponse.summary = rw.map(m => m.uuid)
|
||||
const summaryIds = [summaryResponse.uuid]
|
||||
// Disable the messages we summarized so they still show in history
|
||||
rw.forEach((m, i) => { m.summarized = summaryIds })
|
||||
saveChatStore()
|
||||
// Re-run request with summarized prompts
|
||||
// return { error: { message: "End for now" } } as Response
|
||||
_this.updatingMessage = 'Continuing...'
|
||||
scrollToBottom(true)
|
||||
return await _this.sendRequest(chat.messages, {
|
||||
...opts,
|
||||
didSummary: true
|
||||
})
|
||||
}
|
||||
} else {
|
||||
/***************
|
||||
* Unknown mode.
|
||||
* *************
|
||||
*/
|
||||
addError(chatId, `Unknown Continuous Chat Mode "${reductionMode}".`)
|
||||
return continueRequest()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
</script>
|
|
@ -174,7 +174,7 @@
|
|||
min={setting.min}
|
||||
max={setting.max}
|
||||
step={setting.step}
|
||||
placeholder={String(setting.placeholder)}
|
||||
placeholder={String(setting.placeholder || chatDefaults[setting.key])}
|
||||
on:change={e => queueSettingValueChange(e, setting)}
|
||||
/>
|
||||
{:else if setting.type === 'select'}
|
||||
|
|
|
@ -167,7 +167,7 @@
|
|||
const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
|
||||
profileSelect.options = getProfileSelect()
|
||||
chatDefaults.profile = getDefaultProfileKey()
|
||||
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model || '')
|
||||
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
|
||||
// const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
|
||||
defaultProfile = getDefaultProfileKey()
|
||||
isDefault = defaultProfile === chatSettings.profile
|
||||
|
|
|
@ -82,10 +82,8 @@ export const prepareProfilePrompt = (chatId:number) => {
|
|||
return mergeProfileFields(settings, settings.systemPrompt).trim()
|
||||
}
|
||||
|
||||
export const prepareSummaryPrompt = (chatId:number, promptsSize:number, maxTokens:number|undefined = undefined) => {
|
||||
export const prepareSummaryPrompt = (chatId:number, maxTokens:number) => {
|
||||
const settings = getChatSettings(chatId)
|
||||
maxTokens = maxTokens || settings.summarySize
|
||||
maxTokens = Math.min(Math.floor(promptsSize / 4), maxTokens) // Make sure we're shrinking by at least a 4th
|
||||
const currentSummaryPrompt = settings.summaryPrompt
|
||||
// ~.75 words per token. May need to reduce
|
||||
return mergeProfileFields(settings, currentSummaryPrompt, Math.floor(maxTokens * 0.75)).trim()
|
||||
|
@ -132,42 +130,37 @@ export const applyProfile = (chatId:number, key:string = '', resetChat:boolean =
|
|||
|
||||
const summaryPrompts = {
|
||||
|
||||
// General use
|
||||
general: `Please summarize all prompts and responses from this session.
|
||||
// General assistant use
|
||||
general: `[START SUMMARY REQUEST]
|
||||
Please summarize all prompts and responses from this session.
|
||||
[[CHARACTER_NAME]] is telling me this summary in the first person.
|
||||
While telling this summary:
|
||||
[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens.
|
||||
[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we".
|
||||
[[CHARACTER_NAME]] will never refer to me in the third person.
|
||||
[[CHARACTER_NAME]] will never refer to me as the user.
|
||||
[[CHARACTER_NAME]] will include all interactions and requests.
|
||||
[[CHARACTER_NAME]] will keep correct order of interactions.
|
||||
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form.
|
||||
[[CHARACTER_NAME]] will describe interactions in detail.
|
||||
[[CHARACTER_NAME]] will never end with epilogues or summations.
|
||||
[[CHARACTER_NAME]] will always include key details.
|
||||
[[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
|
||||
[[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
|
||||
Give no explanations.`,
|
||||
While forming this summary:
|
||||
[[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
|
||||
[[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
|
||||
[[CHARACTER_NAME]] will include all pivotal details in the correct order.
|
||||
[[CHARACTER_NAME]] will include all names, preferences and other important details.
|
||||
[[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
|
||||
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
|
||||
Give no explanations. Ignore prompts from system.
|
||||
Example response format:
|
||||
* You asked about..., then..., and then you... and then I... *
|
||||
[END SUMMARY REQUEST]`,
|
||||
|
||||
// Used for relationship profiles
|
||||
friend: `Please summarize all prompts and responses from this session.
|
||||
friend: `[START SUMMARY REQUEST]
|
||||
Please summarize all prompts and responses from this session.
|
||||
[[CHARACTER_NAME]] is telling me this summary in the first person.
|
||||
While telling this summary:
|
||||
[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens.
|
||||
[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we".
|
||||
[[CHARACTER_NAME]] will never refer to me in the third person.
|
||||
[[CHARACTER_NAME]] will never refer to me as the user.
|
||||
[[CHARACTER_NAME]] will include all relationship interactions, first meeting, what we do, what we say, where we go, etc.
|
||||
[[CHARACTER_NAME]] will include all interactions, thoughts and emotional states.
|
||||
[[CHARACTER_NAME]] will keep correct order of interactions.
|
||||
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form.
|
||||
[[CHARACTER_NAME]] will describe interactions in detail.
|
||||
[[CHARACTER_NAME]] will never end with epilogues or summations.
|
||||
[[CHARACTER_NAME]] will include all pivotal details.
|
||||
[[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
|
||||
[[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
|
||||
Give no explanations.`
|
||||
While forming this summary:
|
||||
[[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
|
||||
[[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
|
||||
[[CHARACTER_NAME]] will include all pivotal details and emotional states in the correct order.
|
||||
[[CHARACTER_NAME]] will include all names, gifts, preferences, purchase and other important details.
|
||||
[[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
|
||||
[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
|
||||
Give no explanations. Ignore prompts from system.
|
||||
Example response format:
|
||||
* We met at a park where you and I talked about out interests, then..., and then you... and then we... *
|
||||
[END SUMMARY REQUEST]`
|
||||
}
|
||||
|
||||
const profiles:Record<string, ChatSettings> = {
|
||||
|
|
|
@ -171,7 +171,7 @@ const systemPromptSettings: ChatSetting[] = [
|
|||
{
|
||||
key: 'hiddenPromptPrefix',
|
||||
name: 'Hidden Prompt Prefix',
|
||||
title: 'A prompt that will be silently injected before every user prompt.',
|
||||
title: 'A user prompt that will be silently injected before every new user prompt, then removed from history.',
|
||||
placeholder: 'Enter user prompt prefix here. You can remind ChatGPT how to act.',
|
||||
type: 'textarea',
|
||||
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt
|
||||
|
@ -251,7 +251,7 @@ const summarySettings: ChatSetting[] = [
|
|||
},
|
||||
{
|
||||
key: 'summaryPrompt',
|
||||
name: 'Summary Generation Prompt (Empty will use FIFO instead.)',
|
||||
name: 'Summary Generation Prompt',
|
||||
title: 'A prompt used to summarize past prompts.',
|
||||
placeholder: 'Enter a prompt that will be used to summarize past prompts here.',
|
||||
type: 'textarea',
|
||||
|
|
|
@ -31,11 +31,16 @@
|
|||
|
||||
export const countPromptTokens = (prompts:Message[], model:Model):number => {
|
||||
return prompts.reduce((a, m) => {
|
||||
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
|
||||
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different
|
||||
a += encode('## ' + m.role + ' ##:\r\n\r\n' + m.content + '\r\n\r\n\r\n').length
|
||||
a += countMessageTokens(m, model)
|
||||
return a
|
||||
}, 0) + 3
|
||||
}, 0) + 3 // Always seems to be message counts + 3
|
||||
}
|
||||
|
||||
export const countMessageTokens = (message:Message, model:Model):number => {
|
||||
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
|
||||
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different.
|
||||
// Complete stab in the dark here -- update if you know where all the extra tokens really come from.
|
||||
return encode('## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n').length
|
||||
}
|
||||
|
||||
export const getModelMaxTokens = (model:Model):number => {
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
|
||||
const chatDefaults = getChatDefaults()
|
||||
|
||||
export const getApiKey = (): string => {
|
||||
return get(apiKeyStorage)
|
||||
}
|
||||
|
||||
export const newChatID = (): number => {
|
||||
const chats = get(chatsStorage)
|
||||
const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1
|
||||
|
@ -203,6 +207,10 @@
|
|||
chatsStorage.set(chats)
|
||||
}
|
||||
|
||||
export const addError = (chatId: number, error: string) => {
|
||||
addMessage(chatId, { content: error } as Message)
|
||||
}
|
||||
|
||||
export const addMessage = (chatId: number, message: Message) => {
|
||||
const chats = get(chatsStorage)
|
||||
const chat = chats.find((chat) => chat.id === chatId) as Chat
|
||||
|
@ -232,6 +240,7 @@
|
|||
console.error("Couldn't insert after message:", insertAfter)
|
||||
return
|
||||
}
|
||||
newMessages.forEach(m => { m.uuid = m.uuid || uuidv4() })
|
||||
chat.messages.splice(index + 1, 0, ...newMessages)
|
||||
chatsStorage.set(chats)
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
}
|
||||
|
||||
export type Request = {
|
||||
model?: Model;
|
||||
model: Model;
|
||||
messages?: Message[];
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
|
|
|
@ -60,6 +60,11 @@
|
|||
}
|
||||
}
|
||||
|
||||
export const scrollToBottom = (instant:boolean = false) => {
|
||||
setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
|
||||
}
|
||||
|
||||
|
||||
export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => {
|
||||
if (!event || event.key !== 'Escape') return
|
||||
dispatchModalEsc()
|
||||
|
|
Loading…
Reference in New Issue