Merge pull request #242 from Webifi/main

Add an "Aggressive stop" sequence setting for Petals API calls
This commit is contained in:
Niek van der Maas 2023-07-26 07:23:43 +02:00 committed by GitHub
commit 11ada75a9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 14 deletions

View File

@ -109,7 +109,6 @@ export const runPetalsCompletionRequest = async (
chatResponse.updateFromError(err.message)
throw err
}
window.setTimeout(() => {
chatResponse.updateFromAsyncResponse(
{
model,
@ -122,7 +121,26 @@ export const runPetalsCompletionRequest = async (
}]
} as any
)
}, 1)
if (chat.settings.aggressiveStop && !response.stop) {
// check if we should've stopped
const message = chatResponse.getMessages()[0]
const pad = 10 // look back 10 characters + stop sequence
if (message) {
const mc = (message.content).trim()
for (let i = 0, l = stopSequences.length; i < l; i++) {
const ss = stopSequences[i].trim()
const ind = mc.slice(0 - (ss.length + pad)).indexOf(ss)
if (ind > -1) {
const offset = (ss.length + pad) - ind
message.content = mc.slice(0, mc.length - offset)
response.stop = true
updateMessages(chat.id)
chatResponse.finish()
ws.close()
}
}
}
}
}
}
ws.onclose = () => {

View File

@ -150,6 +150,9 @@ const setPetalsEnabled = (event: Event) => {
<p class="mb-4">
You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX&nbsp;1080&nbsp;8GB, but the larger/faster the better.
</p>
<p class="mb-4">
If you're receiving errors while using Petals, <a target="_blank" href="https://health.petals.dev/">check swarm health</a> and consider <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">adding your GPU to the swarm</a> to help.
</p>
<p class="help is-warning">
Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
</p>

View File

@ -110,6 +110,7 @@ const defaults:ChatSettings = {
hppWithSummaryPrompt: false,
imageGenerationSize: '',
stopSequence: '',
aggressiveStop: false,
userMessageStart: '',
assistantMessageStart: '',
systemMessageStart: '',
@ -524,6 +525,13 @@ const chatSettingsList: ChatSetting[] = [
},
hide: isNotPetals
},
{
key: 'aggressiveStop',
name: 'Use aggressive stop',
title: 'Sometimes generation can continue even after a stop sequence. This will stop generation client side if generation continues after stop sequence.',
type: 'boolean',
hide: isNotPetals
},
{
key: 'userMessageStart',
name: 'User Message Start Sequence',

View File

@ -114,6 +114,7 @@ export type ChatSettings = {
useResponseAlteration?: boolean;
responseAlterations?: ResponseAlteration[];
stopSequence: string;
aggressiveStop: boolean;
userMessageStart: string;
assistantMessageStart: string;
systemMessageStart: string;