Merge pull request #242 from Webifi/main
Add an "Aggressive stop" sequence setting for Petals API calls
This commit is contained in:
		
						commit
						11ada75a9f
					
				| 
						 | 
					@ -109,7 +109,6 @@ export const runPetalsCompletionRequest = async (
 | 
				
			||||||
              chatResponse.updateFromError(err.message)
 | 
					              chatResponse.updateFromError(err.message)
 | 
				
			||||||
              throw err
 | 
					              throw err
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            window.setTimeout(() => {
 | 
					 | 
				
			||||||
            chatResponse.updateFromAsyncResponse(
 | 
					            chatResponse.updateFromAsyncResponse(
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                  model,
 | 
					                  model,
 | 
				
			||||||
| 
						 | 
					@ -122,7 +121,26 @@ export const runPetalsCompletionRequest = async (
 | 
				
			||||||
                  }]
 | 
					                  }]
 | 
				
			||||||
                } as any
 | 
					                } as any
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            }, 1)
 | 
					            if (chat.settings.aggressiveStop && !response.stop) {
 | 
				
			||||||
 | 
					              // check if we should've stopped
 | 
				
			||||||
 | 
					              const message = chatResponse.getMessages()[0]
 | 
				
			||||||
 | 
					              const pad = 10 // look back 10 characters + stop sequence
 | 
				
			||||||
 | 
					              if (message) {
 | 
				
			||||||
 | 
					                const mc = (message.content).trim()
 | 
				
			||||||
 | 
					                for (let i = 0, l = stopSequences.length; i < l; i++) {
 | 
				
			||||||
 | 
					                  const ss = stopSequences[i].trim()
 | 
				
			||||||
 | 
					                  const ind = mc.slice(0 - (ss.length + pad)).indexOf(ss)
 | 
				
			||||||
 | 
					                  if (ind > -1) {
 | 
				
			||||||
 | 
					                    const offset = (ss.length + pad) - ind
 | 
				
			||||||
 | 
					                    message.content = mc.slice(0, mc.length - offset)
 | 
				
			||||||
 | 
					                    response.stop = true
 | 
				
			||||||
 | 
					                    updateMessages(chat.id)
 | 
				
			||||||
 | 
					                    chatResponse.finish()
 | 
				
			||||||
 | 
					                    ws.close()
 | 
				
			||||||
 | 
					                  }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        ws.onclose = () => {
 | 
					        ws.onclose = () => {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -150,6 +150,9 @@ const setPetalsEnabled = (event: Event) => {
 | 
				
			||||||
        <p class="mb-4">
 | 
					        <p class="mb-4">
 | 
				
			||||||
          You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX 1080 8GB, but the larger/faster the better.
 | 
					          You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX 1080 8GB, but the larger/faster the better.
 | 
				
			||||||
        </p>
 | 
					        </p>
 | 
				
			||||||
 | 
					        <p class="mb-4">
 | 
				
			||||||
 | 
					          If you're receiving errors while using Petals, <a target="_blank" href="https://health.petals.dev/">check swarm health</a> and consider <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">adding your GPU to the swarm</a> to help.
 | 
				
			||||||
 | 
					        </p>
 | 
				
			||||||
        <p class="help is-warning">
 | 
					        <p class="help is-warning">
 | 
				
			||||||
          Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
 | 
					          Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
 | 
				
			||||||
        </p>
 | 
					        </p>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -110,6 +110,7 @@ const defaults:ChatSettings = {
 | 
				
			||||||
  hppWithSummaryPrompt: false,
 | 
					  hppWithSummaryPrompt: false,
 | 
				
			||||||
  imageGenerationSize: '',
 | 
					  imageGenerationSize: '',
 | 
				
			||||||
  stopSequence: '',
 | 
					  stopSequence: '',
 | 
				
			||||||
 | 
					  aggressiveStop: false,
 | 
				
			||||||
  userMessageStart: '',
 | 
					  userMessageStart: '',
 | 
				
			||||||
  assistantMessageStart: '',
 | 
					  assistantMessageStart: '',
 | 
				
			||||||
  systemMessageStart: '',
 | 
					  systemMessageStart: '',
 | 
				
			||||||
| 
						 | 
					@ -524,6 +525,13 @@ const chatSettingsList: ChatSetting[] = [
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        hide: isNotPetals
 | 
					        hide: isNotPetals
 | 
				
			||||||
      },
 | 
					      },
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        key: 'aggressiveStop',
 | 
				
			||||||
 | 
					        name: 'Use aggressive stop',
 | 
				
			||||||
 | 
					        title: 'Sometimes generation can continue even after a stop sequence. This will stop generation client side if generation continues after stop sequence.',
 | 
				
			||||||
 | 
					        type: 'boolean',
 | 
				
			||||||
 | 
					        hide: isNotPetals
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
      {
 | 
					      {
 | 
				
			||||||
        key: 'userMessageStart',
 | 
					        key: 'userMessageStart',
 | 
				
			||||||
        name: 'User Message Start Sequence',
 | 
					        name: 'User Message Start Sequence',
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -114,6 +114,7 @@ export type ChatSettings = {
 | 
				
			||||||
    useResponseAlteration?: boolean;
 | 
					    useResponseAlteration?: boolean;
 | 
				
			||||||
    responseAlterations?: ResponseAlteration[];
 | 
					    responseAlterations?: ResponseAlteration[];
 | 
				
			||||||
    stopSequence: string;
 | 
					    stopSequence: string;
 | 
				
			||||||
 | 
					    aggressiveStop: boolean;
 | 
				
			||||||
    userMessageStart: string;
 | 
					    userMessageStart: string;
 | 
				
			||||||
    assistantMessageStart: string;
 | 
					    assistantMessageStart: string;
 | 
				
			||||||
    systemMessageStart: string;
 | 
					    systemMessageStart: string;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue