Merge pull request #242 from Webifi/main

Add an "Aggressive stop" sequence setting for Petals API calls
2023-07-26 07:23:43 +02:00 · 2023-07-26 07:23:43 +02:00 · 11ada75a9f
parent 8edb5d62f0 58afe8f375
commit 11ada75a9f
4 changed files with 44 additions and 14 deletions
--- a/src/lib/ChatRequestPetals.svelte
+++ b/src/lib/ChatRequestPetals.svelte
@ -109,7 +109,6 @@ export const runPetalsCompletionRequest = async (
              chatResponse.updateFromError(err.message)
              throw err
            }
-            window.setTimeout(() => {
            chatResponse.updateFromAsyncResponse(
                {
                  model,
@ -122,7 +121,26 @@ export const runPetalsCompletionRequest = async (
                  }]
                } as any
            )
-            }, 1)
+            if (chat.settings.aggressiveStop && !response.stop) {
+              // check if we should've stopped
+              const message = chatResponse.getMessages()[0]
+              const pad = 10 // look back 10 characters + stop sequence
+              if (message) {
+                const mc = (message.content).trim()
+                for (let i = 0, l = stopSequences.length; i < l; i++) {
+                  const ss = stopSequences[i].trim()
+                  const ind = mc.slice(0 - (ss.length + pad)).indexOf(ss)
+                  if (ind > -1) {
+                    const offset = (ss.length + pad) - ind
+                    message.content = mc.slice(0, mc.length - offset)
+                    response.stop = true
+                    updateMessages(chat.id)
+                    chatResponse.finish()
+                    ws.close()
+                  }
+                }
+              }
+            }
          }
        }
        ws.onclose = () => {
--- a/src/lib/Home.svelte
+++ b/src/lib/Home.svelte
@ -150,6 +150,9 @@ const setPetalsEnabled = (event: Event) => {
        <p class="mb-4">
          You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX&nbsp;1080&nbsp;8GB, but the larger/faster the better.
        </p>
+        <p class="mb-4">
+          If you're receiving errors while using Petals, <a target="_blank" href="https://health.petals.dev/">check swarm health</a> and consider <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">adding your GPU to the swarm</a> to help.
+        </p>
        <p class="help is-warning">
          Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
        </p>
--- a/src/lib/Settings.svelte
+++ b/src/lib/Settings.svelte
@ -110,6 +110,7 @@ const defaults:ChatSettings = {
  hppWithSummaryPrompt: false,
  imageGenerationSize: '',
  stopSequence: '',
+  aggressiveStop: false,
  userMessageStart: '',
  assistantMessageStart: '',
  systemMessageStart: '',
@ -524,6 +525,13 @@ const chatSettingsList: ChatSetting[] = [
        },
        hide: isNotPetals
      },
+      {
+        key: 'aggressiveStop',
+        name: 'Use aggressive stop',
+        title: 'Sometimes generation can continue even after a stop sequence. This will stop generation client side if generation continues after stop sequence.',
+        type: 'boolean',
+        hide: isNotPetals
+      },
      {
        key: 'userMessageStart',
        name: 'User Message Start Sequence',
--- a/src/lib/Types.svelte
+++ b/src/lib/Types.svelte
@ -114,6 +114,7 @@ export type ChatSettings = {
    useResponseAlteration?: boolean;
    responseAlterations?: ResponseAlteration[];
    stopSequence: string;
+    aggressiveStop: boolean;
    userMessageStart: string;
    assistantMessageStart: string;
    systemMessageStart: string;