sendRequest refactor

2023-06-11 16:49:51 -05:00 · 2023-06-11 16:49:51 -05:00 · 66336a0a13
parent 2660512830
commit 66336a0a13
11 changed files with 505 additions and 438 deletions
--- a/src/lib/Chat.svelte
+++ b/src/lib/Chat.svelte
@ -2,36 +2,22 @@
  // This beast needs to be broken down into multiple components before it gets any worse.
  import {
    saveChatStore,
    apiKeyStorage,
    chatsStorage,
    addMessage,
    insertMessages,
    getChatSettingValueNullDefault,
    updateChatSettings,
    checkStateChange,
    showSetChatSettings,
    submitExitingPromptsNow,
    deleteMessage,
    continueMessage,
    getMessage
  } from './Storage.svelte'
  import { getRequestSettingList, defaultModel } from './Settings.svelte'
  import {
    type Request,
    type Message,
-    type Chat,
+    type Chat
    type ChatCompletionOpts,
    type Model,
    type ChatSettings
  } from './Types.svelte'
  import Prompts from './Prompts.svelte'
  import Messages from './Messages.svelte'
-  import { mergeProfileFields, prepareSummaryPrompt, restartProfile } from './Profiles.svelte'
+  import { restartProfile } from './Profiles.svelte'
  import { afterUpdate, onMount, onDestroy } from 'svelte'
  import Fa from 'svelte-fa/src/fa.svelte'
  import {
@ -41,27 +27,29 @@
    faPenToSquare,
    faMicrophone,
    faLightbulb,
-    faCommentSlash
+    faCommentSlash,
    faCircleCheck
  } from '@fortawesome/free-solid-svg-icons/index'
  import { encode } from 'gpt-tokenizer'
  import { v4 as uuidv4 } from 'uuid'
-  import { countPromptTokens, getModelMaxTokens, getPrice } from './Stats.svelte'
+  import { getPrice } from './Stats.svelte'
-  import { autoGrowInputOnEvent, scrollToMessage, sizeTextElements } from './Util.svelte'
+  import { autoGrowInputOnEvent, scrollToBottom, sizeTextElements } from './Util.svelte'
  import ChatSettingsModal from './ChatSettingsModal.svelte'
  import Footer from './Footer.svelte'
  import { openModal } from 'svelte-modals'
  import PromptInput from './PromptInput.svelte'
-  import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
+  import { ChatRequest } from './ChatRequest.svelte'
  import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
  import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
  export let params = { chatId: '' }
  const chatId: number = parseInt(params.chatId)
-  let controller:AbortController = new AbortController()
+  let chatRequest = new ChatRequest()
-  let updating: boolean|number = false
+  // let controller:AbortController
-  let updatingMessage: string = ''
+
  // let updating: boolean|number = false
  // let updatingMessage: string = ''
  let input: HTMLTextAreaElement
  let recognition: any = null
  let recording = false
@ -111,12 +99,15 @@
  onDestroy(async () => {
    // clean up
    // abort any pending requests.
-    controller.abort()
+    chatRequest.controller.abort()
    ttsStop()
  })
  onMount(async () => {
    if (!chat) return
    chatRequest = new ChatRequest()
    chatRequest.setChat(chat)
    // Focus the input on mount
    focusInput()
@ -170,349 +161,8 @@
    scrollToBottom()
  }
  const scrollToBottom = (instant:boolean = false) => {
    setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
  }
  // Send API request
  const sendRequest = async (messages: Message[], opts:ChatCompletionOpts, overrides:ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> => {
    // Show updating bar
    opts.chat = chat
    const chatResponse = new ChatCompletionResponse(opts)
    updating = true
    const model = chat.settings.model || defaultModel
    const maxTokens = getModelMaxTokens(model) // max tokens for model
    const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
    // Submit only the role and content of the messages, provide the previous messages as well for context
    let filtered = messages.filter(messageFilter)
    // Get an estimate of the total prompt size we're sending
    let promptTokenCount:number = countPromptTokens(filtered, model)
    let summarySize = chatSettings.summarySize
    const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
    if (hiddenPromptPrefix && filtered.length && filtered[filtered.length - 1].role === 'user') {
      // update estimate with hiddenPromptPrefix token count
      promptTokenCount += encode(hiddenPromptPrefix + '\n\n').length
    }
    // console.log('Estimated',promptTokenCount,'prompt token for this request')
    if (chatSettings.continuousChat && !opts.didSummary &&
          !opts.summaryRequest && !opts.maxTokens &&
          promptTokenCount > chatSettings.summaryThreshold) {
      // Too many tokens -- well need to summarize some past ones else we'll run out of space
      // Get a block of past prompts we'll summarize
      let pinTop = chatSettings.pinTop
      const tp = chatSettings.trainingPrompts
      pinTop = Math.max(pinTop, tp ? 1 : 0)
      let pinBottom = chatSettings.pinBottom
      const systemPad = (filtered[0] || {} as Message).role === 'system' ? 1 : 0
      const mlen = filtered.length - systemPad // always keep system prompt
      let diff = mlen - (pinTop + pinBottom)
      const useFIFO = chatSettings.continuousChat === 'fifo' || !prepareSummaryPrompt(chatId, 0)
      if (!useFIFO) {
        while (diff <= 3 && (pinTop > 0 || pinBottom > 1)) {
          // Not enough prompts exposed to summarize
          // try to open up pinTop and pinBottom to see if we can get more to summarize
          if (pinTop === 1 && pinBottom > 1) {
            // If we have a pin top, try to keep some of it as long as we can
            pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
          } else {
            pinBottom = Math.max(Math.floor(pinBottom / 2), 0)
            pinTop = Math.max(Math.floor(pinTop / 2), 0)
          }
          diff = mlen - (pinTop + pinBottom)
        }
      }
      if (!useFIFO && diff > 0) {
        // We've found at least one prompt we can try to summarize
        // Reduce to prompts we'll send in for summary
        // (we may need to update this to not include the pin-top, but the context it provides seems to help in the accuracy of the summary)
        const summarize = filtered.slice(0, filtered.length - pinBottom)
        // Estimate token count of what we'll be summarizing
        let sourceTokenCount = countPromptTokens(summarize, model)
        // build summary prompt message
        let summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
        const summaryMessage = {
          role: 'user',
          content: summaryPrompt
        } as Message
        // get an estimate of how many tokens this request + max completions could be
        let summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
        // reduce summary size to make sure we're not requesting a summary larger than our prompts
        summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
        // Make sure our prompt + completion request isn't too large
        while (summarize.length - (pinTop + systemPad) >= 3 && summaryPromptSize + summarySize > maxTokens && summarySize >= 4) {
          summarize.pop()
          sourceTokenCount = countPromptTokens(summarize, model)
          summaryPromptSize = countPromptTokens(summarize.concat(summaryMessage), model)
          summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
        }
        // See if we have to adjust our max summarySize
        if (summaryPromptSize + summarySize > maxTokens) {
          summarySize = maxTokens - summaryPromptSize
        }
        // Always try to end the prompts being summarized with a user prompt.  Seems to work better.
        while (summarize.length - (pinTop + systemPad) >= 4 && summarize[summarize.length - 1].role !== 'user') {
          summarize.pop()
        }
        // update with actual
        sourceTokenCount = countPromptTokens(summarize, model)
        summaryPrompt = prepareSummaryPrompt(chatId, sourceTokenCount)
        summarySize = Math.floor(Math.min(summarySize, sourceTokenCount / 4))
        summaryMessage.content = summaryPrompt
        if (sourceTokenCount > 20 && summaryPrompt && summarySize > 4) {
          // get prompt we'll be inserting after
          const endPrompt = summarize[summarize.length - 1]
          // Add a prompt to ask to summarize them
          const summarizeReq = summarize.slice()
          summarizeReq.push(summaryMessage)
          summaryPromptSize = countPromptTokens(summarizeReq, model)
          // Create a message the summary will be loaded into
          const summaryResponse:Message = {
            role: 'assistant',
            content: '',
            uuid: uuidv4(),
            streaming: opts.streaming,
            summary: []
          }
          summaryResponse.model = model
          // Insert summary completion prompt
          insertMessages(chatId, endPrompt, [summaryResponse])
          if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
          // Wait for the summary completion
          updatingMessage = 'Summarizing...'
          const summary = await sendRequest(summarizeReq, {
            summaryRequest: true,
            streaming: opts.streaming,
            maxTokens: summarySize,
            fillMessage: summaryResponse,
            autoAddMessages: true,
            onMessageChange: (m) => {
              if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
            }
          } as ChatCompletionOpts, {
            temperature: 0, // make summary more deterministic
            top_p: 0.2,
            presence_penalty: -0.5,
            frequency_penalty: 0,
            ...overrides
          } as ChatSettings)
          if (!summary.hasFinished()) await summary.promiseToFinish()
          if (summary.hasError()) {
            // Failed to some API issue. let the original caller handle it.
            deleteMessage(chatId, summaryResponse.uuid)
            return summary
          } else {
            // Looks like we got our summarized messages.
            // get ids of messages we summarized
            const summarizedIds = summarize.slice(pinTop + systemPad).map(m => m.uuid)
            // Mark the new summaries as such
            summaryResponse.summary = summarizedIds
            const summaryIds = [summaryResponse.uuid]
            // Disable the messages we summarized so they still show in history
            summarize.forEach((m, i) => {
              if (i - systemPad >= pinTop) {
                m.summarized = summaryIds
              }
            })
            saveChatStore()
            // Re-run request with summarized prompts
            // return { error: { message: "End for now" } } as Response
            updatingMessage = 'Continuing...'
            opts.didSummary = true
            return await sendRequest(chat.messages, opts)
          }
        } else if (!summaryPrompt) {
          addMessage(chatId, { role: 'error', content: 'Unable to summarize. No summary prompt defined.', uuid: uuidv4() })
        } else if (sourceTokenCount <= 20) {
          addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough words in past content to summarize.', uuid: uuidv4() })
        }
      } else if (!useFIFO && diff < 1) {
        addMessage(chatId, { role: 'error', content: 'Unable to summarize. Not enough messages in past content to summarize.', uuid: uuidv4() })
      } else {
        // roll-off/fifo mode
        const top = filtered.slice(0, pinTop + systemPad)
        const rollaway = filtered.slice(pinTop + systemPad)
        let promptTokenCount = countPromptTokens(top.concat(rollaway), model)
        // suppress messages we're rolling off
        while (rollaway.length > (((promptTokenCount + (chatSettings.max_tokens || 1)) > maxTokens) ? pinBottom || 1 : 1) &&
            promptTokenCount >= chatSettings.summaryThreshold) {
          const rollOff = rollaway.shift()
          if (rollOff) rollOff.suppress = true
          promptTokenCount = countPromptTokens(top.concat(rollaway), model)
        }
        saveChatStore()
        // get a new list now excluding them
        filtered = messages.filter(messageFilter)
      }
    }
    const messagePayload = filtered.map((m, i) => {
      const r = { role: m.role, content: m.content }
      if (i === filtered.length - 1 && m.role === 'user' && hiddenPromptPrefix && !opts.summaryRequest) {
        // If the last prompt is a user prompt, and we have a hiddenPromptPrefix, inject it
        r.content = hiddenPromptPrefix + '\n\n' + m.content
      }
      return r
    }) as Message[]
    // Update token count with actual
    promptTokenCount = countPromptTokens(messagePayload, model)
    const maxAllowed = getModelMaxTokens(chatSettings.model as Model) - (promptTokenCount + 1)
    try {
      const request: Request = {
        messages: messagePayload,
        // Provide the settings by mapping the settingsMap to key/value pairs
        ...getRequestSettingList().reduce((acc, setting) => {
          const key = setting.key
          let value = getChatSettingValueNullDefault(chatId, setting)
          if (key in overrides) value = overrides[key]
          if (typeof setting.apiTransform === 'function') {
            value = setting.apiTransform(chatId, setting, value)
          }
          if (key === 'max_tokens') {
            if (opts.maxTokens) value = opts.maxTokens // only as large as requested
            if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
          }
          if (key === 'n') {
            if (opts.streaming || opts.summaryRequest) {
              /*
              Streaming goes insane with more than one completion.
              Doesn't seem like there's any way to separate the jumbled mess of deltas for the
              different completions.
              Summary should only have one completion
              */
              value = 1
            }
          }
          if (value !== null) acc[key] = value
          return acc
        }, {})
      }
      request.stream = opts.streaming
      chatResponse.setPromptTokenCount(promptTokenCount) // streaming needs this
      const signal = controller.signal
      // console.log('apikey', $apiKeyStorage)
      const fetchOptions = {
        method: 'POST',
        headers: {
          Authorization: `Bearer ${$apiKeyStorage}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify(request),
        signal
      }
      const handleError = async (response) => {
        let errorResponse
        try {
          const errObj = await response.json()
          errorResponse = errObj?.error?.message || errObj?.error?.code
          if (!errorResponse && response.choices && response.choices[0]) {
            errorResponse = response.choices[0]?.message?.content
          }
          errorResponse = errorResponse || 'Unexpected Response'
        } catch (e) {
          errorResponse = 'Unknown Response'
        }
        throw new Error(`${response.status} - ${errorResponse}`)
      }
      // fetchEventSource doesn't seem to throw on abort, so...
      const abortListener = (e:Event) => {
        controller = new AbortController()
        chatResponse.updateFromError('User aborted request.')
        signal.removeEventListener('abort', abortListener)
      }
      signal.addEventListener('abort', abortListener)
      if (opts.streaming) {
        chatResponse.onFinish(() => {
          updating = false
          updatingMessage = ''
          scrollToBottom()
        })
        fetchEventSource(getApiBase() + getEndpointCompletions(), {
          ...fetchOptions,
          openWhenHidden: true,
          onmessage (ev) {
            // Remove updating indicator
            updating = 1 // hide indicator, but still signal we're updating
            updatingMessage = ''
            // console.log('ev.data', ev.data)
            if (!chatResponse.hasFinished()) {
              if (ev.data === '[DONE]') {
                // ?? anything to do when "[DONE]"?
              } else {
                const data = JSON.parse(ev.data)
                // console.log('data', data)
                window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
              }
            }
          },
          onclose () {
            chatResponse.updateFromClose()
          },
          onerror (err) {
            console.error(err)
            throw err
          },
          async onopen (response) {
            if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
              // everything's good
            } else {
              // client-side errors are usually non-retriable:
              await handleError(response)
            }
          }
        }).catch(err => {
          chatResponse.updateFromError(err.message)
          scrollToBottom()
        })
      } else {
        const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
        if (!response.ok) {
          await handleError(response)
        } else {
          const json = await response.json()
          // Remove updating indicator
          updating = false
          updatingMessage = ''
          chatResponse.updateFromSyncResponse(json)
          scrollToBottom()
        }
      }
    } catch (e) {
      // console.error(e)
      updating = false
      updatingMessage = ''
      chatResponse.updateFromError(e.message)
      scrollToBottom()
    }
    return chatResponse
  }
  const addNewMessage = () => {
-    if (updating) return
+    if (chatRequest.updating) return
    let inputMessage: Message
    const lastMessage = chat.messages[chat.messages.length - 1]
    const uuid = uuidv4()
@ -545,9 +195,21 @@
    }
  }
  let waitingForCancel:any = 0
  const cancelRequest = () => {
    if (!waitingForCancel) {
      // wait a second for another click to avoid accidental cancel
      waitingForCancel = setTimeout(() => { waitingForCancel = 0 }, 1000)
      return
    }
    clearTimeout(waitingForCancel); waitingForCancel = 0
    chatRequest.controller.abort()
  }
  const submitForm = async (recorded: boolean = false, skipInput: boolean = false, fillMessage: Message|undefined = undefined): Promise<void> => {
    // Compose the system prompt message if there are no messages yet - disabled for now
-    if (updating) return
+    if (chatRequest.updating) return
    lastSubmitRecorded = recorded
@ -562,8 +224,6 @@
        fillMessage = chat.messages[chat.messages.length - 1]
      }
      if (fillMessage && fillMessage.content) fillMessage.content += ' ' // add a space
      // Clear the input value
      input.value = ''
      input.blur()
@ -573,7 +233,7 @@
    }
    focusInput()
-    const response = await sendRequest(chat.messages, {
+    const response = await chatRequest.sendRequest(chat.messages, {
      chat,
      autoAddMessages: true, // Auto-add and update messages in array
      streaming: chatSettings.stream,
@ -600,7 +260,7 @@
    const suggestMessages = chat.messages.slice(0, 10) // limit to first 10 messages
    suggestMessages.push(suggestMessage)
-    const response = await sendRequest(suggestMessages, {
+    const response = await chatRequest.sendRequest(suggestMessages, {
      chat,
      autoAddMessages: false,
      streaming: false,
@ -640,7 +300,7 @@
  const recordToggle = () => {
    ttsStop()
-    if (updating) return
+    if (chatRequest.updating) return
    // Check if already recording - if so, stop - else start
    if (recording) {
      recognition?.stop()
@ -677,11 +337,11 @@
 <Messages messages={chat.messages} chatId={chatId} />
-{#if updating === true}
+{#if chatRequest.updating === true}
  <article class="message is-success assistant-message">
    <div class="message-body content">
      <span class="is-loading" ></span>
-      <span>{updatingMessage}</span>
+      <span>{chatRequest.updatingMessage}</span>
    </div>
  </article>
 {/if}
@ -710,7 +370,7 @@
      />
    </p>
    <p class="control mic" class:is-hidden={!recognition}>
-      <button class="button" class:is-disabled={updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
+      <button class="button" class:is-disabled={chatRequest.updating} class:is-pulse={recording} on:click|preventDefault={recordToggle}
        ><span class="icon"><Fa icon={faMicrophone} /></span></button
      >
    </p>
@ -718,11 +378,17 @@
      <button title="Chat/Profile Settings" class="button" on:click|preventDefault={showSettingsModal}><span class="icon"><Fa icon={faGear} /></span></button>
    </p>
    <p class="control queue">
-      <button title="Queue message, don't send yet" class:is-disabled={updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
+      <button title="Queue message, don't send yet" class:is-disabled={chatRequest.updating} class="button is-ghost" on:click|preventDefault={addNewMessage}><span class="icon"><Fa icon={faArrowUpFromBracket} /></span></button>
    </p>
-    {#if updating}
+    {#if chatRequest.updating}
    <p class="control send">
-      <button title="Cancel Response" class="button is-danger" type="button" on:click={() => { controller.abort() }}><span class="icon"><Fa icon={faCommentSlash} /></span></button>
+      <button title="Cancel Response" class="button is-danger" type="button" on:click={cancelRequest}><span class="icon">
        {#if waitingForCancel}
        <Fa icon={faCircleCheck} />
        {:else}
        <Fa icon={faCommentSlash} />
        {/if}
      </span></button>
    </p>
    {:else}
    <p class="control send">
--- a/src/lib/ChatCompletionResponse.svelte
+++ b/src/lib/ChatCompletionResponse.svelte
@ -34,7 +34,7 @@ export class ChatCompletionResponse {
  private setModel = (model: Model) => {
    if (!model) return
-    !this.model && setLatestKnownModel(this.chat.settings.model as Model, model)
+    !this.model && setLatestKnownModel(this.chat.settings.model, model)
    this.lastModel = this.model || model
    this.model = model
  }
@ -51,6 +51,15 @@ export class ChatCompletionResponse {
  private messageChangeListeners: ((m: Message[]) => void)[] = []
  private finishListeners: ((m: Message[]) => void)[] = []
  private initialFillMerge (existingContent:string, newContent:string):string {
    if (!this.didFill && this.isFill && existingContent && !newContent.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
      // add a trailing space if our new content isn't a contraction
      existingContent += ' '
    }
    this.didFill = true
    return existingContent
  }
  setPromptTokenCount (tokens:number) {
    this.promptTokenCount = tokens
  }
@ -61,11 +70,7 @@ export class ChatCompletionResponse {
      const exitingMessage = this.messages[i]
      const message = exitingMessage || choice.message
      if (exitingMessage) {
-        if (!this.didFill && this.isFill && choice.message.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
+        message.content = this.initialFillMerge(message.content, choice.message.content)
          // deal with merging contractions since we've added an extra space to your fill message
          message.content.replace(/ $/, '')
        }
        this.didFill = true
        message.content += choice.message.content
        message.usage = message.usage || {
          prompt_tokens: 0,
@ -100,11 +105,7 @@ export class ChatCompletionResponse {
      } as Message
      choice.delta?.role && (message.role = choice.delta.role)
      if (choice.delta?.content) {
-        if (!this.didFill && this.isFill && choice.delta.content.match(/^'(t|ll|ve|m|d|re)[^a-z]/i)) {
+        message.content = this.initialFillMerge(message.content, choice.delta?.content)
          // deal with merging contractions since we've added an extra space to your fill message
          message.content.replace(/([a-z]) $/i, '$1')
        }
        this.didFill = true
        message.content += choice.delta.content
      }
      completionTokenCount += encode(message.content).length
@ -179,7 +180,7 @@ export class ChatCompletionResponse {
    this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
    saveChatStore()
    const message = this.messages[0]
-    const model = this.model || getLatestKnownModel(this.chat.settings.model as Model)
+    const model = this.model || getLatestKnownModel(this.chat.settings.model)
    if (message) {
      if (this.isFill && this.lastModel === this.model && this.offsetTotals && model && message.usage) {
        // Need to subtract some previous message totals before we add new combined message totals
--- a/src/lib/ChatRequest.svelte
+++ b/src/lib/ChatRequest.svelte
@ -0,0 +1,388 @@
 <script context="module" lang="ts">
    import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
    import { mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
    import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
    import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
    import { deleteMessage, getChatSettingValueNullDefault, insertMessages, saveChatStore, getApiKey, addError } from './Storage.svelte'
    import { scrollToBottom, scrollToMessage } from './Util.svelte'
    import { getRequestSettingList, defaultModel } from './Settings.svelte'
    import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
    import { getApiBase, getEndpointCompletions } from './ApiUtil.svelte'
 export class ChatRequest {
      constructor () {
        this.controller = new AbortController()
        this.updating = false
        this.updatingMessage = ''
      }
      private chat: Chat
      updating: boolean|number = false
      updatingMessage: string = ''
      controller:AbortController
      setChat (chat: Chat) {
        this.chat = chat
      }
      /**
       * Send API request
       * @param messages
       * @param opts
       * @param overrides
       */
      async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
        // TODO:  Continue to break this method down to smaller chunks
        const _this = this
        const chat = _this.chat
        const chatSettings = _this.chat.settings
        const chatId = chat.id
        opts.chat = chat
        _this.updating = true
        // Submit only the role and content of the messages, provide the previous messages as well for context
        const messageFilter = (m:Message) => !m.suppress && m.role !== 'error' && m.content && !m.summarized
        const filtered = messages.filter(messageFilter)
        // If we're doing continuous chat, do it
        if (!opts.didSummary && !opts.summaryRequest && chatSettings.continuousChat) return await this.doContinuousChat(filtered, opts, overrides)
        const model = this.getModel()
        const maxTokens = getModelMaxTokens(model)
        const messagePayload = filtered.map((m, i) => { return { role: m.role, content: m.content } }) as Message[]
        // Inject hidden prompt if requested
        if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessage(messagePayload, true)
        const chatResponse = new ChatCompletionResponse(opts)
        const promptTokenCount = countPromptTokens(messagePayload, model)
        const maxAllowed = maxTokens - (promptTokenCount + 1)
        // Build and make the request
        try {
          // Build the API request body
          const request: Request = {
            model: chatSettings.model,
            messages: messagePayload,
            // Provide the settings by mapping the settingsMap to key/value pairs
            ...getRequestSettingList().reduce((acc, setting) => {
              const key = setting.key
              let value = getChatSettingValueNullDefault(chatId, setting)
              if (key in overrides) value = overrides[key]
              if (typeof setting.apiTransform === 'function') {
                value = setting.apiTransform(chatId, setting, value)
              }
              if (key === 'max_tokens') {
                if (opts.maxTokens) value = opts.maxTokens // only as large as requested
                if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
              }
              if (key === 'n') {
                if (opts.streaming || opts.summaryRequest) {
                /*
                Streaming goes insane with more than one completion.
                Doesn't seem like there's any way to separate the jumbled mess of deltas for the
                different completions.
                Summary should only have one completion
                */
                  value = 1
                }
              }
              if (value !== null) acc[key] = value
              return acc
            }, {}),
            stream: opts.streaming
          }
          // Add out token count to the response handler
          // (streaming doesn't return counts, so we need to do it client side)
          chatResponse.setPromptTokenCount(promptTokenCount)
          const signal = _this.controller.signal
          // console.log('apikey', $apiKeyStorage)
          const fetchOptions = {
            method: 'POST',
            headers: {
              Authorization: `Bearer ${getApiKey()}`,
              'Content-Type': 'application/json'
            },
            body: JSON.stringify(request),
            signal
          }
          // Common error handler
          const handleError = async (response) => {
            let errorResponse
            try {
              const errObj = await response.json()
              errorResponse = errObj?.error?.message || errObj?.error?.code
              if (!errorResponse && response.choices && response.choices[0]) {
                errorResponse = response.choices[0]?.message?.content
              }
              errorResponse = errorResponse || 'Unexpected Response'
            } catch (e) {
              errorResponse = 'Unknown Response'
            }
            throw new Error(`${response.status} - ${errorResponse}`)
          }
          // fetchEventSource doesn't seem to throw on abort,
          // so we deal with it ourselves
          const abortListener = (e:Event) => {
            _this.controller = new AbortController()
            chatResponse.updateFromError('User aborted request.')
            signal.removeEventListener('abort', abortListener)
          }
          signal.addEventListener('abort', abortListener)
          if (opts.streaming) {
            /**
             * Streaming request/response
             * We'll get the response a token at a time, as soon as they are ready
            */
            chatResponse.onFinish(() => {
              _this.updating = false
              _this.updatingMessage = ''
            })
            fetchEventSource(getApiBase() + getEndpointCompletions(), {
              ...fetchOptions,
              openWhenHidden: true,
              onmessage (ev) {
              // Remove updating indicator
                _this.updating = 1 // hide indicator, but still signal we're updating
                _this.updatingMessage = ''
                // console.log('ev.data', ev.data)
                if (!chatResponse.hasFinished()) {
                  if (ev.data === '[DONE]') {
                  // ?? anything to do when "[DONE]"?
                  } else {
                    const data = JSON.parse(ev.data)
                    // console.log('data', data)
                    window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
                  }
                }
              },
              onclose () {
                chatResponse.updateFromClose()
              },
              onerror (err) {
                console.error(err)
                throw err
              },
              async onopen (response) {
                if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
                // everything's good
                } else {
                // client-side errors are usually non-retriable:
                  await handleError(response)
                }
              }
            }).catch(err => {
              chatResponse.updateFromError(err.message)
            })
          } else {
            /**
             * Non-streaming request/response
             * We'll get the response all at once, after a long delay
             */
            const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
            if (!response.ok) {
              await handleError(response)
            } else {
              const json = await response.json()
              // Remove updating indicator
              _this.updating = false
              _this.updatingMessage = ''
              chatResponse.updateFromSyncResponse(json)
            }
          }
        } catch (e) {
        // console.error(e)
          _this.updating = false
          _this.updatingMessage = ''
          chatResponse.updateFromError(e.message)
        }
        return chatResponse
      }
      private getModel (): Model {
        return this.chat.settings.model || defaultModel
      }
      private buildHiddenPromptPrefixMessage (messages: Message[], insert:boolean = false): Message|null {
        const chatSettings = this.chat.settings
        const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
        if (hiddenPromptPrefix && messages.length && messages[messages.length - 1].role === 'user') {
          const message = { role: 'user', content: hiddenPromptPrefix } as Message
          if (insert) {
            messages.splice(messages.length - 1, 0, message)
          }
          return message
        }
        return null
      }
      private getTokenCountPadding (filtered: Message[]): number {
        const hiddenPromptMessage = this.buildHiddenPromptPrefixMessage(filtered)
        let result = 0
        if (hiddenPromptMessage) {
          // add cost of hiddenPromptPrefix
          result += countMessageTokens(hiddenPromptMessage, this.getModel())
        }
        return result
      }
      private async doContinuousChat (filtered: Message[], opts: ChatCompletionOpts, overrides: ChatSettings): Promise<ChatCompletionResponse> {
        const _this = this
        const chat = _this.chat
        const chatSettings = chat.settings
        const chatId = chat.id
        const reductionMode = chatSettings.continuousChat
        const model = _this.getModel()
        const maxTokens = getModelMaxTokens(model) // max tokens for model
        const continueRequest = async () => {
          return await _this.sendRequest(chat.messages, {
            ...opts,
            didSummary: true
          }, overrides)
        }
        // Get extra counts for when the prompts are finally sent.
        const countPadding = this.getTokenCountPadding(filtered)
        // See if we have enough to apply any of the reduction modes
        const fullPromptSize = countPromptTokens(filtered, model) + countPadding
        if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
        const overMax = fullPromptSize > maxTokens * 0.95
        // Isolate the pool of messages we're going to reduce
        const pinTop = chatSettings.pinTop
        let pinBottom = chatSettings.pinBottom || 2
        const systemPad = filtered[0]?.role === 'system' ? 1 : 0
        const top = filtered.slice(0, pinTop + systemPad)
        let rw = filtered.slice(pinTop + systemPad, filtered.length)
        if (pinBottom >= rw.length) pinBottom = 1
        if (pinBottom >= rw.length) {
          if (overMax) addError(chatId, 'Unable to apply continuous chat.  Check threshold, pin top and pin bottom settings.')
          return await continueRequest()
        }
        // Reduce based on mode
        if (reductionMode === 'fifo') {
          /***************************************************************
           * FIFO mode.  Roll the top off until we're under our threshold.
           * *************************************************************
           */
          let promptSize = countPromptTokens(top.concat(rw), model) + countPadding
          while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
            const rolled = rw.shift()
            // Hide messages we're "rolling"
            if (rolled) rolled.suppress = true
            promptSize = countPromptTokens(top.concat(rw), model) + countPadding
          }
          // Run a new request, now with the rolled messages hidden
          return await _this.sendRequest(chat.messages, {
            ...opts,
            didSummary: true // our "summary" was simply dropping some messages
          }, overrides)
        } else if (reductionMode === 'summary') {
          /******************************************************
           * Summary mode. Reduce it all to a summary, if we can.
           * ****************************************************
           */
          const bottom = rw.slice(0 - pinBottom)
          rw = rw.slice(0, 0 - pinBottom)
          let reductionPoolSize = countPromptTokens(rw, model)
          const ss = chatSettings.summarySize
          const getSS = ():number => (ss < 1 && ss > 0)
            ? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
            : Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
          let promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
          const summaryRequest = { role: 'user', content: promptSummary } as Message
          let promptSummarySize = countMessageTokens(summaryRequest, model)
          // Make sure there is enough room to generate the summary, and try to make sure
          // the last prompt is a user prompt as that seems to work better for summaries
          while ((reductionPoolSize + promptSummarySize + getSS()) >= maxTokens ||
              (reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
            bottom.unshift(rw.pop() as Message)
            reductionPoolSize = countPromptTokens(rw, model)
            promptSummary = prepareSummaryPrompt(chatId, reductionPoolSize)
            summaryRequest.content = promptSummary
            promptSummarySize = countMessageTokens(summaryRequest, model)
          }
          if (reductionPoolSize < 50) {
            if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
            return continueRequest()
          }
          // Create a message the summary will be loaded into
          const summaryResponse = {
            role: 'assistant',
            content: '',
            streaming: opts.streaming,
            summary: [] as string[],
            model
          } as Message
          // Insert summary completion prompt after that last message we're summarizing
          insertMessages(chatId, rw[rw.length - 1], [summaryResponse])
          if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)
          // Request and load the summarization prompt
          _this.updatingMessage = 'Summarizing...'
          const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
            summaryRequest: true,
            streaming: opts.streaming,
            maxTokens: chatSettings.summarySize,
            fillMessage: summaryResponse,
            autoAddMessages: true,
            onMessageChange: (m) => {
              if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
            }
          } as ChatCompletionOpts, {
            temperature: 0, // make summary more deterministic
            top_p: 0.5,
            presence_penalty: 0,
            frequency_penalty: 0,
            ...overrides
          } as ChatSettings)
          // Wait for the response to complete
          if (!summary.hasFinished()) await summary.promiseToFinish()
          if (summary.hasError()) {
            // Failed to some API issue. let the original caller handle it.
            deleteMessage(chatId, summaryResponse.uuid)
            return summary
          } else {
            // Looks like we got our summarized messages.
            // Mark the new summaries as such
            summaryResponse.summary = rw.map(m => m.uuid)
            const summaryIds = [summaryResponse.uuid]
            // Disable the messages we summarized so they still show in history
            rw.forEach((m, i) => { m.summarized = summaryIds })
            saveChatStore()
            // Re-run request with summarized prompts
            // return { error: { message: "End for now" } } as Response
            _this.updatingMessage = 'Continuing...'
            scrollToBottom(true)
            return await _this.sendRequest(chat.messages, {
              ...opts,
              didSummary: true
            })
          }
        } else {
          /***************
           * Unknown mode.
           * *************
          */
          addError(chatId, `Unknown Continuous Chat Mode "${reductionMode}".`)
          return continueRequest()
        }
      }
 }
 </script>
--- a/src/lib/ChatSettingField.svelte
+++ b/src/lib/ChatSettingField.svelte
@ -174,7 +174,7 @@
            min={setting.min}
            max={setting.max}
            step={setting.step}
-            placeholder={String(setting.placeholder)}
+            placeholder={String(setting.placeholder || chatDefaults[setting.key])}
            on:change={e => queueSettingValueChange(e, setting)}
          />
        {:else if setting.type === 'select'}
--- a/src/lib/ChatSettingsModal.svelte
+++ b/src/lib/ChatSettingsModal.svelte
@ -167,7 +167,7 @@
    const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
    profileSelect.options = getProfileSelect()
    chatDefaults.profile = getDefaultProfileKey()
-    chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model || '')
+    chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
    // const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
    defaultProfile = getDefaultProfileKey()
    isDefault = defaultProfile === chatSettings.profile
--- a/src/lib/Profiles.svelte
+++ b/src/lib/Profiles.svelte
@ -82,10 +82,8 @@ export const prepareProfilePrompt = (chatId:number) => {
    return mergeProfileFields(settings, settings.systemPrompt).trim()
 }
-export const prepareSummaryPrompt = (chatId:number, promptsSize:number, maxTokens:number|undefined = undefined) => {
+export const prepareSummaryPrompt = (chatId:number, maxTokens:number) => {
    const settings = getChatSettings(chatId)
    maxTokens = maxTokens || settings.summarySize
    maxTokens = Math.min(Math.floor(promptsSize / 4), maxTokens) // Make sure we're shrinking by at least a 4th
    const currentSummaryPrompt = settings.summaryPrompt
    // ~.75 words per token.  May need to reduce
    return mergeProfileFields(settings, currentSummaryPrompt, Math.floor(maxTokens * 0.75)).trim()
@ -132,42 +130,37 @@ export const applyProfile = (chatId:number, key:string = '', resetChat:boolean =
 const summaryPrompts = {
-    // General use
+    // General assistant use
-    general: `Please summarize all prompts and responses from this session. 
+    general: `[START SUMMARY REQUEST]
 Please summarize all prompts and responses from this session. 
 [[CHARACTER_NAME]] is telling me this summary in the first person.
-While telling this summary:
+While forming this summary:
-[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens.
+[[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
-[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we".
+[[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
-[[CHARACTER_NAME]] will never refer to me in the third person.
+[[CHARACTER_NAME]] will include all pivotal details in the correct order.
-[[CHARACTER_NAME]] will never refer to me as the user.
+[[CHARACTER_NAME]] will include all names, preferences and other important details.
-[[CHARACTER_NAME]] will include all interactions and requests.
+[[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
-[[CHARACTER_NAME]] will keep correct order of interactions.
+[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
-[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form.
+Give no explanations. Ignore prompts from system.  
-[[CHARACTER_NAME]] will describe interactions in detail.
+Example response format: 
-[[CHARACTER_NAME]] will never end with epilogues or summations.
+* You asked about..., then..., and then you... and then I... *
-[[CHARACTER_NAME]] will  always include key details.
+[END SUMMARY REQUEST]`,
 [[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
 [[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
 Give no explanations.`,
    // Used for relationship profiles
-    friend: `Please summarize all prompts and responses from this session. 
+    friend: `[START SUMMARY REQUEST]
 Please summarize all prompts and responses from this session. 
 [[CHARACTER_NAME]] is telling me this summary in the first person.
-While telling this summary:
+While forming this summary:
-[[CHARACTER_NAME]] will keep summary in the present tense, describing it as it happens.
+[[CHARACTER_NAME]] will never add details or inferences that have not yet happened and do not clearly exist in the prompts and responses.
-[[CHARACTER_NAME]] will always refer to me in the second person as "you" or "we".
+[[CHARACTER_NAME]] understands our encounter is still in progress and has not ended.
-[[CHARACTER_NAME]] will never refer to me in the third person.
+[[CHARACTER_NAME]] will include all pivotal details and emotional states in the correct order.
-[[CHARACTER_NAME]] will never refer to me as the user.
+[[CHARACTER_NAME]] will include all names, gifts, preferences, purchase and other important details.
-[[CHARACTER_NAME]] will include all relationship interactions, first meeting, what we do, what we say, where we go, etc.
+[[CHARACTER_NAME]] will always refer to me in the 2nd person, for example "you".
-[[CHARACTER_NAME]] will include all interactions, thoughts and emotional states.
+[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as is possible using [[MAX_WORDS]] words.
-[[CHARACTER_NAME]] will keep correct order of interactions.
+Give no explanations. Ignore prompts from system.  
-[[CHARACTER_NAME]] will keep the summary compact, but retain as much detail as possible in a compact form.
+Example response format: 
-[[CHARACTER_NAME]] will describe interactions in detail.
+* We met at a park where you and I talked about out interests, then..., and then you... and then we... *
-[[CHARACTER_NAME]] will never end with epilogues or summations.
+[END SUMMARY REQUEST]`
 [[CHARACTER_NAME]] will include all pivotal details.
 [[CHARACTER_NAME]]'s summary will be [[MAX_WORDS]] words.
 [[CHARACTER_NAME]] will never add details or inferences that do not clearly exist in the prompts and responses.
 Give no explanations.`
 }
 const profiles:Record<string, ChatSettings> = {
--- a/src/lib/Settings.svelte
+++ b/src/lib/Settings.svelte
@ -171,7 +171,7 @@ const systemPromptSettings: ChatSetting[] = [
      {
        key: 'hiddenPromptPrefix',
        name: 'Hidden Prompt Prefix',
-        title: 'A prompt that will be silently injected before every user prompt.',
+        title: 'A user prompt that will be silently injected before every new user prompt, then removed from history.',
        placeholder: 'Enter user prompt prefix here.  You can remind ChatGPT how to act.',
        type: 'textarea',
        hide: (chatId) => !getChatSettings(chatId).useSystemPrompt
@ -251,7 +251,7 @@ const summarySettings: ChatSetting[] = [
      },
      {
        key: 'summaryPrompt',
-        name: 'Summary Generation Prompt (Empty will use FIFO instead.)',
+        name: 'Summary Generation Prompt',
        title: 'A prompt used to summarize past prompts.',
        placeholder: 'Enter a prompt that will be used to summarize past prompts here.',
        type: 'textarea',
--- a/src/lib/Stats.svelte
+++ b/src/lib/Stats.svelte
@ -31,11 +31,16 @@
  export const countPromptTokens = (prompts:Message[], model:Model):number => {
    return prompts.reduce((a, m) => {
-      // Not sure how OpenAI formats it, but this seems to get close to the right counts.
+      a += countMessageTokens(m, model)
      // Would be nice to know. This works for gpt-3.5.  gpt-4 could be different
      a += encode('## ' + m.role + ' ##:\r\n\r\n' + m.content + '\r\n\r\n\r\n').length
      return a
-    }, 0) + 3
+    }, 0) + 3 // Always seems to be message counts + 3
  }
  export const countMessageTokens = (message:Message, model:Model):number => {
    // Not sure how OpenAI formats it, but this seems to get close to the right counts.
    // Would be nice to know. This works for gpt-3.5.  gpt-4 could be different.
    // Complete stab in the dark here -- update if you know where all the extra tokens really come from.
    return encode('## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n').length
  }
  export const getModelMaxTokens = (model:Model):number => {
--- a/src/lib/Storage.svelte
+++ b/src/lib/Storage.svelte
@ -19,6 +19,10 @@
  const chatDefaults = getChatDefaults()
  export const getApiKey = (): string => {
    return get(apiKeyStorage)
  }
  export const newChatID = (): number => {
    const chats = get(chatsStorage)
    const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1
@ -203,6 +207,10 @@
    chatsStorage.set(chats)
  }
  export const addError = (chatId: number, error: string) => {
    addMessage(chatId, { content: error } as Message)
  }
  export const addMessage = (chatId: number, message: Message) => {
    const chats = get(chatsStorage)
    const chat = chats.find((chat) => chat.id === chatId) as Chat
@ -232,6 +240,7 @@
      console.error("Couldn't insert after message:", insertAfter)
      return
    }
    newMessages.forEach(m => { m.uuid = m.uuid || uuidv4() })
    chat.messages.splice(index + 1, 0, ...newMessages)
    chatsStorage.set(chats)
  }
--- a/src/lib/Types.svelte
+++ b/src/lib/Types.svelte
@ -38,7 +38,7 @@
  }
  export type Request = {
-    model?: Model;
+    model: Model;
    messages?: Message[];
    temperature?: number;
    top_p?: number;
--- a/src/lib/Util.svelte
+++ b/src/lib/Util.svelte
@ -60,6 +60,11 @@
    }
  }
  export const scrollToBottom = (instant:boolean = false) => {
    setTimeout(() => document.querySelector('body')?.scrollIntoView({ behavior: (instant ? 'instant' : 'smooth') as any, block: 'end' }), 0)
  }
  export const checkModalEsc = (event:KeyboardEvent|undefined):boolean|void => {
    if (!event || event.key !== 'Escape') return
    dispatchModalEsc()