Merge pull request #242 from Webifi/main
Add an "Aggressive stop" sequence setting for Petals API calls
This commit is contained in:
commit
11ada75a9f
|
@ -109,7 +109,6 @@ export const runPetalsCompletionRequest = async (
|
|||
chatResponse.updateFromError(err.message)
|
||||
throw err
|
||||
}
|
||||
window.setTimeout(() => {
|
||||
chatResponse.updateFromAsyncResponse(
|
||||
{
|
||||
model,
|
||||
|
@ -122,7 +121,26 @@ export const runPetalsCompletionRequest = async (
|
|||
}]
|
||||
} as any
|
||||
)
|
||||
}, 1)
|
||||
if (chat.settings.aggressiveStop && !response.stop) {
|
||||
// check if we should've stopped
|
||||
const message = chatResponse.getMessages()[0]
|
||||
const pad = 10 // look back 10 characters + stop sequence
|
||||
if (message) {
|
||||
const mc = (message.content).trim()
|
||||
for (let i = 0, l = stopSequences.length; i < l; i++) {
|
||||
const ss = stopSequences[i].trim()
|
||||
const ind = mc.slice(0 - (ss.length + pad)).indexOf(ss)
|
||||
if (ind > -1) {
|
||||
const offset = (ss.length + pad) - ind
|
||||
message.content = mc.slice(0, mc.length - offset)
|
||||
response.stop = true
|
||||
updateMessages(chat.id)
|
||||
chatResponse.finish()
|
||||
ws.close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ws.onclose = () => {
|
||||
|
|
|
@ -150,6 +150,9 @@ const setPetalsEnabled = (event: Event) => {
|
|||
<p class="mb-4">
|
||||
You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX 1080 8GB, but the larger/faster the better.
|
||||
</p>
|
||||
<p class="mb-4">
|
||||
If you're receiving errors while using Petals, <a target="_blank" href="https://health.petals.dev/">check swarm health</a> and consider <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">adding your GPU to the swarm</a> to help.
|
||||
</p>
|
||||
<p class="help is-warning">
|
||||
Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
|
||||
</p>
|
||||
|
|
|
@ -110,6 +110,7 @@ const defaults:ChatSettings = {
|
|||
hppWithSummaryPrompt: false,
|
||||
imageGenerationSize: '',
|
||||
stopSequence: '',
|
||||
aggressiveStop: false,
|
||||
userMessageStart: '',
|
||||
assistantMessageStart: '',
|
||||
systemMessageStart: '',
|
||||
|
@ -524,6 +525,13 @@ const chatSettingsList: ChatSetting[] = [
|
|||
},
|
||||
hide: isNotPetals
|
||||
},
|
||||
{
|
||||
key: 'aggressiveStop',
|
||||
name: 'Use aggressive stop',
|
||||
title: 'Sometimes generation can continue even after a stop sequence. This will stop generation client side if generation continues after stop sequence.',
|
||||
type: 'boolean',
|
||||
hide: isNotPetals
|
||||
},
|
||||
{
|
||||
key: 'userMessageStart',
|
||||
name: 'User Message Start Sequence',
|
||||
|
|
|
@ -114,6 +114,7 @@ export type ChatSettings = {
|
|||
useResponseAlteration?: boolean;
|
||||
responseAlterations?: ResponseAlteration[];
|
||||
stopSequence: string;
|
||||
aggressiveStop: boolean;
|
||||
userMessageStart: string;
|
||||
assistantMessageStart: string;
|
||||
systemMessageStart: string;
|
||||
|
|
Loading…
Reference in New Issue