Merge pull request #170 from Webifi/main

Fixes for summarization, refactor model definitions.
2023-06-13 09:16:09 +02:00 · 2023-06-13 09:16:09 +02:00 · 5715594973
parent 63432f4c90 111209c582
commit 5715594973
8 changed files with 200 additions and 120 deletions
--- a/src/lib/ChatRequest.svelte
+++ b/src/lib/ChatRequest.svelte
@ -59,8 +59,6 @@ export class ChatRequest {
        const promptTokenCount = countPromptTokens(messagePayload, model)
        const maxAllowed = maxTokens - (promptTokenCount + 1)
    
-        // Build and make the request
-        try {
        // Build the API request body
        const request: Request = {
          model: chatSettings.model,
@ -94,14 +92,14 @@ export class ChatRequest {
          stream: opts.streaming
        }

+        // Set-up and make the request
+        try {
          // Add out token count to the response handler
          // (streaming doesn't return counts, so we need to do it client side)
          chatResponse.setPromptTokenCount(promptTokenCount)

          const signal = _this.controller.signal
    
-          // console.log('apikey', $apiKeyStorage)
-
          const fetchOptions = {
            method: 'POST',
            headers: {
@ -297,19 +295,21 @@ export class ChatRequest {
           */
    
          const bottom = rw.slice(0 - pinBottom)
+          let continueCounter = chatSettings.summaryExtend + 1
          rw = rw.slice(0, 0 - pinBottom)
          let reductionPoolSize = countPromptTokens(rw, model)
          const ss = chatSettings.summarySize
          const getSS = ():number => (ss < 1 && ss > 0)
            ? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
            : Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
+          const topSize = countPromptTokens(top, model)
          let maxSummaryTokens = getSS()
          let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
          const summaryRequest = { role: 'user', content: promptSummary } as Message
          let promptSummarySize = countMessageTokens(summaryRequest, model)
          // Make sure there is enough room to generate the summary, and try to make sure
          // the last prompt is a user prompt as that seems to work better for summaries
-          while ((reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens ||
+          while ((topSize + reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens ||
              (reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
            bottom.unshift(rw.pop() as Message)
            reductionPoolSize = countPromptTokens(rw, model)
@ -340,6 +340,9 @@ export class ChatRequest {

          // Request and load the summarization prompt
          _this.updatingMessage = 'Summarizing...'
+          const summarizedIds = rw.map(m => m.uuid)
+          const summaryIds = [summaryResponse.uuid]
+          while (continueCounter-- > 0) {
            try {
              const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]), {
                summaryRequest: true,
@ -351,8 +354,8 @@ export class ChatRequest {
                  if (opts.streaming) scrollToMessage(summaryResponse.uuid, 150, true, true)
                }
              } as ChatCompletionOpts, {
-              temperature: 0, // make summary more deterministic
-              top_p: 0.5,
+                temperature: 0.1, // make summary more deterministic
+                top_p: 1,
                presence_penalty: 0,
                frequency_penalty: 0,
                ...overrides
@ -360,26 +363,24 @@ export class ChatRequest {
              // Wait for the response to complete
              if (!summary.hasFinished()) await summary.promiseToFinish()
              if (summary.hasError()) {
-            // Failed to some API issue. let the original caller handle it.
-              deleteMessage(chatId, summaryResponse.uuid)
+                // Failed for some API issue. let the original caller handle it.
+                _this.updating = false
+                _this.updatingMessage = ''
+                deleteMessage(chatId, srid)
                return summary
-            } else {
+              }
              // Looks like we got our summarized messages.
              // Mark the new summaries as such
-              summaryResponse.summary = rw.map(m => m.uuid)
-              const summaryIds = [summaryResponse.uuid]
-              // Disable the messages we summarized so they still show in history
-              rw.forEach((m, i) => { m.summarized = summaryIds })
-              saveChatStore()
-              // Re-run request with summarized prompts
-              // return { error: { message: "End for now" } } as Response
-              _this.updatingMessage = 'Continuing...'
-              scrollToBottom(true)
-              return await _this.sendRequest(chat.messages, {
-                ...opts,
-                didSummary: true
-              },
-              overrides)
+              // Need more?
+              if (summaryResponse.finish_reason === 'length' && continueCounter > 0) {
+                // Our summary was truncated
+                // Try to get more of it
+                delete summaryResponse.finish_reason
+                _this.updatingMessage = 'Summarizing more...'
+                continue
+              } else {
+                // We're done
+                continueCounter = 0
              }
            } catch (e) {
              _this.updating = false
@ -387,6 +388,19 @@ export class ChatRequest {
              deleteMessage(chatId, srid)
              throw e
            }
+          }
+          summaryResponse.summary = summarizedIds
+          // Disable the messages we summarized so they still show in history
+          rw.forEach((m, i) => { m.summarized = summaryIds })
+          saveChatStore()
+          // Re-run request with summarized prompts
+          _this.updatingMessage = 'Continuing...'
+          scrollToBottom(true)
+          return await _this.sendRequest(chat.messages, {
+            ...opts,
+            didSummary: true
+          },
+          overrides)
        } else {
          /***************
           * Unknown mode.
--- a/src/lib/ChatSettingField.svelte
+++ b/src/lib/ChatSettingField.svelte
@ -177,7 +177,7 @@
            placeholder={String(setting.placeholder || chatDefaults[setting.key])}
            on:change={e => queueSettingValueChange(e, setting)}
          />
-        {:else if setting.type === 'select'}
+        {:else if setting.type === 'select' || setting.type === 'select-number'}
          <!-- <div class="select"> -->
            <div class="select" class:control={fieldControls.length}>
            <select id="settings-{setting.key}" title="{setting.title}" on:change={e => queueSettingValueChange(e, setting) } >
--- a/src/lib/ChatSettingsModal.svelte
+++ b/src/lib/ChatSettingsModal.svelte
@ -13,7 +13,7 @@
    checkStateChange,
    addChat
  } from './Storage.svelte'
-  import { supportedModels, type Chat, type ChatSetting, type ResponseModels, type SettingSelect, type SelectOption, type ChatSettings } from './Types.svelte'
+  import type { Chat, ChatSetting, ResponseModels, SettingSelect, SelectOption, ChatSettings } from './Types.svelte'
  import { errorNotice, sizeTextElements } from './Util.svelte'
  import Fa from 'svelte-fa/src/fa.svelte'
  import {
@ -37,6 +37,7 @@
  import { openModal } from 'svelte-modals'
  import PromptConfirm from './PromptConfirm.svelte'
  import { getApiBase, getEndpointModels } from './ApiUtil.svelte'
+  import { supportedModelKeys } from './Models.svelte'

  export let chatId:number
  export const show = () => { showSettings() }
@ -194,7 +195,7 @@
        }
      })
    ).json()) as ResponseModels
-    const filteredModels = supportedModels.filter((model) => allModels.data.find((m) => m.id === model))
+    const filteredModels = supportedModelKeys.filter((model) => allModels.data.find((m) => m.id === model))

    const modelOptions:SelectOption[] = filteredModels.reduce((a, m) => {
      const o:SelectOption = {
--- a/src/lib/Models.svelte
+++ b/src/lib/Models.svelte
@ -0,0 +1,68 @@
+<script context="module" lang="ts">
+    import type { ModelDetail, Model } from './Types.svelte'
+
+// Reference: https://openai.com/pricing#language-models
+// Eventually we'll add API hosts and endpoints to this
+const modelDetails : Record<string, ModelDetail> = {
+      'gpt-4-32k': {
+        prompt: 0.00006, // $0.06 per 1000 tokens prompt
+        completion: 0.00012, // $0.12 per 1000 tokens completion
+        max: 32768 // 32k max token buffer
+      },
+      'gpt-4': {
+        prompt: 0.00003, // $0.03 per 1000 tokens prompt
+        completion: 0.00006, // $0.06 per 1000 tokens completion
+        max: 8192 // 8k max token buffer
+      },
+      'gpt-3.5': {
+        prompt: 0.000002, // $0.002 per 1000 tokens prompt
+        completion: 0.000002, // $0.002 per 1000 tokens completion
+        max: 4096 // 4k max token buffer
+      }
+}
+
+const unknownDetail = {
+  prompt: 0,
+  completion: 0,
+  max: 4096
+}
+
+// See: https://platform.openai.com/docs/models/model-endpoint-compatibility
+// Eventually we'll add UI for managing this
+export const supportedModels : Record<string, ModelDetail> = {
+      'gpt-4': modelDetails['gpt-4'],
+      'gpt-4-0314': modelDetails['gpt-4'],
+      'gpt-4-32k': modelDetails['gpt-4-32k'],
+      'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
+      'gpt-3.5-turbo': modelDetails['gpt-3.5'],
+      'gpt-3.5-turbo-0301': modelDetails['gpt-3.5']
+}
+
+const lookupList = {
+  ...modelDetails,
+  ...supportedModels
+}
+
+export const supportedModelKeys = Object.keys(supportedModels)
+
+const tpCache : Record<string, ModelDetail> = {}
+
+export const getModelDetail = (model: Model) => {
+      // First try to get exact match, then from cache
+      let r = supportedModels[model] || tpCache[model]
+      if (r) return r
+      // If no exact match, find closest match
+      const k = Object.keys(lookupList)
+        .sort((a, b) => b.length - a.length) // Longest to shortest for best match
+        .find((k) => model.startsWith(k))
+      if (k) {
+        r = lookupList[k]
+      } else {
+        r = unknownDetail
+      }
+      // Cache it so we don't need to do that again
+      tpCache[model] = r
+      return r
+}
+
+</script>
--- a/src/lib/Settings.svelte
+++ b/src/lib/Settings.svelte
@ -60,7 +60,7 @@ const gptDefaults = {
  n: 1,
  stream: true,
  stop: null,
-  max_tokens: 500,
+  max_tokens: 512,
  presence_penalty: 0,
  frequency_penalty: 0,
  logit_bias: null,
@ -77,6 +77,7 @@ const defaults:ChatSettings = {
  continuousChat: 'fifo',
  summaryThreshold: 3000,
  summarySize: 1000,
+  summaryExtend: 0,
  pinTop: 0,
  pinBottom: 6,
  summaryPrompt: '',
@ -222,11 +223,23 @@ const summarySettings: ChatSetting[] = [
        name: 'Max Summary Size',
        title: 'Maximum number of tokens allowed for summary response.',
        min: 128,
-        max: 512,
+        max: 1024,
        step: 1,
        type: 'number',
        hide: (chatId) => getChatSettings(chatId).continuousChat !== 'summary'
      },
+      {
+        key: 'summaryExtend',
+        name: 'Summary Extend',
+        title: 'Number of times a truncated summary can be extended.',
+        type: 'select-number',
+        options: [
+          { value: 0, text: '0 - Summary must fit in first call.' },
+          { value: 1, text: '1 - Allow one extra API call to extend.' },
+          { value: 2, text: '2 - Allow two extra API calls to extend.' }
+        ],
+        hide: (chatId) => getChatSettings(chatId).continuousChat !== 'summary'
+      },
      {
        key: 'pinTop',
        name: 'Keep First Prompts',
--- a/src/lib/Stats.svelte
+++ b/src/lib/Stats.svelte
@ -1,32 +1,11 @@
 <script context="module" lang="ts">
+  import { getModelDetail } from './Models.svelte'
  import type { Message, Model, Usage } from './Types.svelte'
  import { encode } from 'gpt-tokenizer'

-  // Reference: https://openai.com/pricing#language-models
-  // TODO: Move to settings of some type
-  const modelDetails : Record<string, [number, number, number]> = {
-    'gpt-4-32k': [0.00006, 0.00012, 32768], // $0.06 per 1000 tokens prompt, $0.12 per 1000 tokens completion, max 32k
-    'gpt-4': [0.00003, 0.00006, 8192], // $0.03 per 1000 tokens prompt, $0.06 per 1000 tokens completion, max 8k
-    'gpt-3.5': [0.000002, 0.000002, 4096] // $0.002 per 1000 tokens (both prompt and completion), max 4k
-  }
-
-  const tpCache = {}
-  const getModelDetail = (model: Model) => {
-    let r = tpCache[model]
-    if (r) return r
-    const k = Object.keys(modelDetails).find((k) => model.startsWith(k))
-    if (k) {
-      r = modelDetails[k]
-    } else {
-      r = [0, 0, 4096]
-    }
-    tpCache[model] = r
-    return r
-  }
-
  export const getPrice = (tokens: Usage, model: Model): number => {
    const t = getModelDetail(model)
-    return ((tokens.prompt_tokens * t[0]) + (tokens.completion_tokens * t[1]))
+    return ((tokens.prompt_tokens * t.prompt) + (tokens.completion_tokens * t.completion))
  }

  export const countPromptTokens = (prompts:Message[], model:Model):number => {
@ -44,7 +23,7 @@
  }

  export const getModelMaxTokens = (model:Model):number => {
-    return getModelDetail(model)[2]
+    return getModelDetail(model).max
  }

 </script>
--- a/src/lib/Storage.svelte
+++ b/src/lib/Storage.svelte
@ -333,6 +333,7 @@
  export const cleanSettingValue = (type:string, value: any) => {
    switch (type) {
      case 'number':
+      case 'select-number':
        value = parseFloat(value)
        if (isNaN(value)) { value = null }
        return value
--- a/src/lib/Types.svelte
+++ b/src/lib/Types.svelte
@ -1,15 +1,13 @@
 <script context="module" lang="ts">
-  // import type internal from "stream";
+  import type { supportedModelKeys } from './Models.svelte'

-  export const supportedModels = [ // See: https://platform.openai.com/docs/models/model-endpoint-compatibility
-    'gpt-4',
-    'gpt-4-0314',
-    'gpt-4-32k',
-    'gpt-4-32k-0314',
-    'gpt-3.5-turbo',
-    'gpt-3.5-turbo-0301'
-  ]
-  export type Model = typeof supportedModels[number];
+  export type Model = typeof supportedModelKeys[number];
+
+  export type ModelDetail = {
+    prompt: number;
+    completion: number;
+    max: number;
+  };

  export type Usage = {
    completion_tokens: number;
@ -60,6 +58,7 @@
    continuousChat: (''|'fifo'|'summary');
    summaryThreshold: number;
    summarySize: number;
+    summaryExtend: number;
    pinTop: number;
    pinBottom: number;
    summaryPrompt: string;
@ -141,7 +140,7 @@
  };

  export type SelectOption = {
-    value: string;
+    value: string|number;
    text: string;
  };

@ -154,6 +153,11 @@ type SettingBoolean = {
    options: SelectOption[];
  };

+  export type SettingSelectNumber = {
+    type: 'select-number';
+    options: SelectOption[];
+  };
+
  export type SettingText = {
    type: 'text';
  };
@ -199,7 +203,7 @@ type SettingBoolean = {
    fieldControls?: FieldControl[];
    beforeChange?: (chatId:number, setting:ChatSetting, value:any) => boolean;
    afterChange?: (chatId:number, setting:ChatSetting, value:any) => boolean;
-  } & (SettingNumber | SettingSelect | SettingBoolean | SettingText | SettingTextArea | SettingOther | SubSetting);
+  } & (SettingNumber | SettingSelect | SettingSelectNumber | SettingBoolean | SettingText | SettingTextArea | SettingOther | SubSetting);


  export type GlobalSetting = {