chatgpt-web/src/lib/ChatRequest.svelte

<script context="module" lang="ts">
    import { ChatCompletionResponse } from './ChatCompletionResponse.svelte'
    import { mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
    import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
    import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request, RequestImageGeneration } from './Types.svelte'
    import { deleteMessage, getChatSettingValueNullDefault, insertMessages, saveChatStore, getApiKey, addError } from './Storage.svelte'
    import { scrollToBottom, scrollToMessage } from './Util.svelte'
    import { getRequestSettingList, defaultModel } from './Settings.svelte'
    import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
    import { getApiBase, getEndpointCompletions, getEndpointGenerations } from './ApiUtil.svelte'
    import { v4 as uuidv4 } from 'uuid'

export class ChatRequest {
      constructor () {
        this.controller = new AbortController()
        this.updating = false
        this.updatingMessage = ''
      }

      private chat: Chat
      updating: boolean|number = false
      updatingMessage: string = ''
      controller:AbortController

      setChat (chat: Chat) {
        this.chat = chat
      }

      // Common error handler
      async handleError (response) {
        let errorResponse
        try {
          const errObj = await response.json()
          errorResponse = errObj?.error?.message || errObj?.error?.code
          if (!errorResponse && response.choices && response.choices[0]) {
            errorResponse = response.choices[0]?.message?.content
          }
          errorResponse = errorResponse || 'Unexpected Response'
        } catch (e) {
          errorResponse = 'Unknown Response'
        }
        throw new Error(`${response.status} - ${errorResponse}`)
      }

      async imageRequest (message: Message, prompt: string, count:number, messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
        const _this = this
        count = count || 1
        _this.updating = true
        _this.updatingMessage = 'Generating Image...'
        const signal = _this.controller.signal
        const size = this.chat.settings.imageGenerationSize
        const request: RequestImageGeneration = {
          prompt,
          response_format: 'b64_json',
          size,
          n: count
        }
        const fetchOptions = {
          method: 'POST',
          headers: {
            Authorization: `Bearer ${getApiKey()}`,
            'Content-Type': 'application/json'
          },
          body: JSON.stringify(request),
          signal
        }
        const chatResponse = new ChatCompletionResponse(opts)

        try {
          const response = await fetch(getApiBase() + getEndpointGenerations(), fetchOptions)
          if (!response.ok) {
            await _this.handleError(response)
          } else {
            const json = await response.json()
            // Remove updating indicator
            _this.updating = false
            _this.updatingMessage = ''
            // console.log('image json', json, json?.data[0])
            chatResponse.updateImageFromSyncResponse(json, prompt, 'dall-e-' + size)
          }
        } catch (e) {
          chatResponse.updateFromError(e)
          throw e
        }
        message.suppress = true
        return chatResponse
      }

      /**
       * Send API request
       * @param messages
       * @param opts
       * @param overrides
       */
      async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
        // TODO:  Continue to break this method down to smaller chunks
        const _this = this
        const chat = _this.chat
        const chatSettings = _this.chat.settings
        const chatId = chat.id
        const imagePromptDetect = /^\s*(please|can\s+you|will\s+you)*\s*(give|generate|create|show|build|design)\s+(me)*\s*(an|a|set|a\s+set\s+of)*\s*([0-9]+|one|two|three|four)*\s+(image|photo|picture|pic)s*\s*(for\s+me)*\s*(of|[^a-z0-9]+|about|that\s+has|showing|with|having|depicting)\s+[^a-z0-9]*(.*)$/i
        opts.chat = chat
        _this.updating = true

        const lastMessage = messages[messages.length - 1]

        if (chatSettings.imageGenerationSize && !opts.didSummary && !opts.summaryRequest && lastMessage?.role === 'user') {
          const im = lastMessage.content.match(imagePromptDetect)
          if (im) {
            // console.log('image prompt request', im)
            let n = parseInt((im[5] || '').toLowerCase().trim()
              .replace(/one/ig, '1')
              .replace(/two/ig, '2')
              .replace(/three/ig, '3')
              .replace(/four/ig, '4')
            )
            if (isNaN(n)) n = 1
            n = Math.min(Math.max(1, n), 4)
            return await this.imageRequest(lastMessage, im[9], n, messages, opts, overrides)
            // throw new Error('Image prompt:' + im[7])
          }
        }

        // Submit only the role and content of the messages, provide the previous messages as well for context
        const messageFilter = (m:Message) => !m.suppress &&
          ['user', 'assistant', 'system'].includes(m.role) &&
          m.content && !m.summarized
        const filtered = messages.filter(messageFilter)

        // If we're doing continuous chat, do it
        if (!opts.didSummary && !opts.summaryRequest && chatSettings.continuousChat) return await this.doContinuousChat(filtered, opts, overrides)

        const model = this.getModel()
        const maxTokens = getModelMaxTokens(model)

        const messagePayload = filtered.map((m, i) => { return { role: m.role, content: m.content } }) as Message[]
        // Inject hidden prompt if requested
        if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessage(messagePayload, true)

        const chatResponse = new ChatCompletionResponse(opts)
        const promptTokenCount = countPromptTokens(messagePayload, model)
        const maxAllowed = maxTokens - (promptTokenCount + 1)

        // Build the API request body
        const request: Request = {
          model: chatSettings.model,
          messages: messagePayload,
          // Provide the settings by mapping the settingsMap to key/value pairs
          ...getRequestSettingList().reduce((acc, setting) => {
            const key = setting.key
            let value = getChatSettingValueNullDefault(chatId, setting)
            if (key in overrides) value = overrides[key]
            if (typeof setting.apiTransform === 'function') {
              value = setting.apiTransform(chatId, setting, value)
            }
            if (key === 'max_tokens') {
              if (opts.maxTokens) value = opts.maxTokens // only as large as requested
              if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
            }
            if (key === 'n') {
              if (opts.streaming || opts.summaryRequest) {
                /*
                Streaming goes insane with more than one completion.
                Doesn't seem like there's any way to separate the jumbled mess of deltas for the
                different completions.
                Summary should only have one completion
                */
                value = 1
              }
            }
            if (value !== null) acc[key] = value
            return acc
          }, {}),
          stream: opts.streaming
        }

        // Set-up and make the request
        try {
          // Add out token count to the response handler
          // (streaming doesn't return counts, so we need to do it client side)
          chatResponse.setPromptTokenCount(promptTokenCount)

          const signal = _this.controller.signal

          const fetchOptions = {
            method: 'POST',
            headers: {
              Authorization: `Bearer ${getApiKey()}`,
              'Content-Type': 'application/json'
            },
            body: JSON.stringify(request),
            signal
          }

          // fetchEventSource doesn't seem to throw on abort,
          // so we deal with it ourselves
          const abortListener = (e:Event) => {
            _this.controller = new AbortController()
            chatResponse.updateFromError('User aborted request.')
            signal.removeEventListener('abort', abortListener)
          }
          signal.addEventListener('abort', abortListener)

          if (opts.streaming) {
            /**
             * Streaming request/response
             * We'll get the response a token at a time, as soon as they are ready
            */
            chatResponse.onFinish(() => {
              _this.updating = false
              _this.updatingMessage = ''
            })
            fetchEventSource(getApiBase() + getEndpointCompletions(), {
              ...fetchOptions,
              openWhenHidden: true,
              onmessage (ev) {
              // Remove updating indicator
                _this.updating = 1 // hide indicator, but still signal we're updating
                _this.updatingMessage = ''
                // console.log('ev.data', ev.data)
                if (!chatResponse.hasFinished()) {
                  if (ev.data === '[DONE]') {
                  // ?? anything to do when "[DONE]"?
                  } else {
                    const data = JSON.parse(ev.data)
                    // console.log('data', data)
                    window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
                  }
                }
              },
              onclose () {
                chatResponse.updateFromClose()
              },
              onerror (err) {
                console.error(err)
                throw err
              },
              async onopen (response) {
                if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
                // everything's good
                } else {
                // client-side errors are usually non-retriable:
                  await _this.handleError(response)
                }
              }
            }).catch(err => {
              chatResponse.updateFromError(err.message)
            })
          } else {
            /**
             * Non-streaming request/response
             * We'll get the response all at once, after a long delay
             */
            const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
            if (!response.ok) {
              await _this.handleError(response)
            } else {
              const json = await response.json()
              // Remove updating indicator
              _this.updating = false
              _this.updatingMessage = ''
              chatResponse.updateFromSyncResponse(json)
            }
          }
        } catch (e) {
        // console.error(e)
          _this.updating = false
          _this.updatingMessage = ''
          chatResponse.updateFromError(e.message)
        }

        return chatResponse
      }

      private getModel (): Model {
        return this.chat.settings.model || defaultModel
      }

      private buildHiddenPromptPrefixMessage (messages: Message[], insert:boolean = false): Message|null {
        const chatSettings = this.chat.settings
        const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
        if (hiddenPromptPrefix && messages.length && messages[messages.length - 1].role === 'user') {
          const message = { role: 'user', content: hiddenPromptPrefix } as Message
          if (insert) {
            messages.splice(messages.length - 1, 0, message)
          }
          return message
        }
        return null
      }

      private getTokenCountPadding (filtered: Message[]): number {
        const hiddenPromptMessage = this.buildHiddenPromptPrefixMessage(filtered)
        let result = 0
        if (hiddenPromptMessage) {
          // add cost of hiddenPromptPrefix
          result += countMessageTokens(hiddenPromptMessage, this.getModel())
        }
        return result
      }

      private async doContinuousChat (filtered: Message[], opts: ChatCompletionOpts, overrides: ChatSettings): Promise<ChatCompletionResponse> {
        const _this = this
        const chat = _this.chat
        const chatSettings = chat.settings
        const chatId = chat.id
        const reductionMode = chatSettings.continuousChat
        const model = _this.getModel()
        const maxTokens = getModelMaxTokens(model) // max tokens for model

        const continueRequest = async () => {
          return await _this.sendRequest(chat.messages, {
            ...opts,
            didSummary: true
          }, overrides)
        }

        // Get extra counts for when the prompts are finally sent.
        const countPadding = this.getTokenCountPadding(filtered)

        // See if we have enough to apply any of the reduction modes
        const fullPromptSize = countPromptTokens(filtered, model) + countPadding
        if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
        const overMax = fullPromptSize > maxTokens * 0.95

        // Isolate the pool of messages we're going to reduce
        const pinTop = chatSettings.pinTop
        let pinBottom = chatSettings.pinBottom || 2
        const systemPad = filtered[0]?.role === 'system' ? 1 : 0
        const top = filtered.slice(0, pinTop + systemPad)
        let rw = filtered.slice(pinTop + systemPad, filtered.length)
        if (pinBottom >= rw.length) pinBottom = 1
        if (pinBottom >= rw.length) {
          if (overMax) addError(chatId, 'Unable to apply continuous chat.  Check threshold, pin top and pin bottom settings.')
          return await continueRequest()
        }

        // Reduce based on mode
        if (reductionMode === 'fifo') {
          /***************************************************************
           * FIFO mode.  Roll the top off until we're under our threshold.
           * *************************************************************
           */

          let promptSize = countPromptTokens(top.concat(rw), model) + countPadding
          while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
            const rolled = rw.shift()
            // Hide messages we're "rolling"
            if (rolled) rolled.suppress = true
            promptSize = countPromptTokens(top.concat(rw), model) + countPadding
          }
          // Run a new request, now with the rolled messages hidden
          return await _this.sendRequest(chat.messages, {
            ...opts,
            didSummary: true // our "summary" was simply dropping some messages
          }, overrides)
        } else if (reductionMode === 'summary') {
          /******************************************************
           * Summary mode. Reduce it all to a summary, if we can.
           * ****************************************************
           */

          const bottom = rw.slice(0 - pinBottom)
          let continueCounter = chatSettings.summaryExtend + 1
          rw = rw.slice(0, 0 - pinBottom)
          let reductionPoolSize = countPromptTokens(rw, model)
          const ss = chatSettings.summarySize
          const getSS = ():number => (ss < 1 && ss > 0)
            ? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
            : Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
          const topSize = countPromptTokens(top, model)
          let maxSummaryTokens = getSS()
          let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
          const summaryRequest = { role: 'user', content: promptSummary } as Message
          let promptSummarySize = countMessageTokens(summaryRequest, model)
          // Make sure there is enough room to generate the summary, and try to make sure
          // the last prompt is a user prompt as that seems to work better for summaries
          while ((topSize + reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens ||
              (reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
            bottom.unshift(rw.pop() as Message)
            reductionPoolSize = countPromptTokens(rw, model)
            maxSummaryTokens = getSS()
            promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
            summaryRequest.content = promptSummary
            promptSummarySize = countMessageTokens(summaryRequest, model)
          }
          if (reductionPoolSize < 50) {
            if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
            return continueRequest()
          }

          // Create a message the summary will be loaded into
          const srid = uuidv4()
          const summaryResponse:Message = {
            role: 'assistant',
            content: '',
            uuid: srid,
            streaming: opts.streaming,
            summary: [] as string[],
            model
          }

          // Insert summary completion prompt after that last message we're summarizing
          insertMessages(chatId, rw[rw.length - 1], [summaryResponse])
          if (opts.streaming) setTimeout(() => scrollToMessage(summaryResponse.uuid, 150, true, true), 0)

          // Request and load the summarization prompt
          _this.updatingMessage = 'Summarizing...'
          const summarizedIds = rw.map(m => m.uuid)
          const summaryIds = [summaryResponse.uuid]
          while (continueCounter-- > 0) {
            try {
              const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
                summaryRequest: true,
                streaming: opts.streaming,
                maxTokens: maxSummaryTokens,
                fillMessage: summaryResponse,
                autoAddMessages: true,
                onMessageChange: (m) => {
                  if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
                }
              } as ChatCompletionOpts, {
                temperature: 0.1, // make summary more deterministic
                top_p: 1,
                presence_penalty: 0,
                frequency_penalty: 0,
                ...overrides
              } as ChatSettings)
              // Wait for the response to complete
              if (!summary.hasFinished()) await summary.promiseToFinish()
              if (summary.hasError()) {
                // Failed for some API issue. let the original caller handle it.
                _this.updating = false
                _this.updatingMessage = ''
                deleteMessage(chatId, srid)
                return summary
              }
              // Looks like we got our summarized messages.
              // Mark the new summaries as such
              // Need more?
              if (summaryResponse.finish_reason === 'length' && continueCounter > 0) {
                // Our summary was truncated
                // Try to get more of it
                delete summaryResponse.finish_reason
                _this.updatingMessage = 'Summarizing more...'
                continue
              } else {
                // We're done
                continueCounter = 0
              }
            } catch (e) {
              _this.updating = false
              _this.updatingMessage = ''
              deleteMessage(chatId, srid)
              throw e
            }
          }
          summaryResponse.summary = summarizedIds
          // Disable the messages we summarized so they still show in history
          rw.forEach((m, i) => { m.summarized = summaryIds })
          saveChatStore()
          // Re-run request with summarized prompts
          _this.updatingMessage = 'Continuing...'
          scrollToBottom(true)
          return await _this.sendRequest(chat.messages, {
            ...opts,
            didSummary: true
          },
          overrides)
        } else {
          /***************
           * Unknown mode.
           * *************
          */
          addError(chatId, `Unknown Continuous Chat Mode "${reductionMode}".`)
          return continueRequest()
        }
      }
}

</script>