Merge pull request #242 from Webifi/main
Add an "Aggressive stop" sequence setting for Petals API calls
This commit is contained in:
		
						commit
						11ada75a9f
					
				| 
						 | 
				
			
			@ -109,20 +109,38 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
              chatResponse.updateFromError(err.message)
 | 
			
		||||
              throw err
 | 
			
		||||
            }
 | 
			
		||||
            window.setTimeout(() => {
 | 
			
		||||
              chatResponse.updateFromAsyncResponse(
 | 
			
		||||
                      {
 | 
			
		||||
                        model,
 | 
			
		||||
                        choices: [{
 | 
			
		||||
                          delta: {
 | 
			
		||||
                            content: response.outputs,
 | 
			
		||||
                            role: 'assistant'
 | 
			
		||||
                          },
 | 
			
		||||
                          finish_reason: (response.stop ? 'stop' : null)
 | 
			
		||||
                        }]
 | 
			
		||||
                      } as any
 | 
			
		||||
              )
 | 
			
		||||
            }, 1)
 | 
			
		||||
            chatResponse.updateFromAsyncResponse(
 | 
			
		||||
                {
 | 
			
		||||
                  model,
 | 
			
		||||
                  choices: [{
 | 
			
		||||
                    delta: {
 | 
			
		||||
                      content: response.outputs,
 | 
			
		||||
                      role: 'assistant'
 | 
			
		||||
                    },
 | 
			
		||||
                    finish_reason: (response.stop ? 'stop' : null)
 | 
			
		||||
                  }]
 | 
			
		||||
                } as any
 | 
			
		||||
            )
 | 
			
		||||
            if (chat.settings.aggressiveStop && !response.stop) {
 | 
			
		||||
              // check if we should've stopped
 | 
			
		||||
              const message = chatResponse.getMessages()[0]
 | 
			
		||||
              const pad = 10 // look back 10 characters + stop sequence
 | 
			
		||||
              if (message) {
 | 
			
		||||
                const mc = (message.content).trim()
 | 
			
		||||
                for (let i = 0, l = stopSequences.length; i < l; i++) {
 | 
			
		||||
                  const ss = stopSequences[i].trim()
 | 
			
		||||
                  const ind = mc.slice(0 - (ss.length + pad)).indexOf(ss)
 | 
			
		||||
                  if (ind > -1) {
 | 
			
		||||
                    const offset = (ss.length + pad) - ind
 | 
			
		||||
                    message.content = mc.slice(0, mc.length - offset)
 | 
			
		||||
                    response.stop = true
 | 
			
		||||
                    updateMessages(chat.id)
 | 
			
		||||
                    chatResponse.finish()
 | 
			
		||||
                    ws.close()
 | 
			
		||||
                  }
 | 
			
		||||
                }
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        ws.onclose = () => {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -150,6 +150,9 @@ const setPetalsEnabled = (event: Event) => {
 | 
			
		|||
        <p class="mb-4">
 | 
			
		||||
          You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX 1080 8GB, but the larger/faster the better.
 | 
			
		||||
        </p>
 | 
			
		||||
        <p class="mb-4">
 | 
			
		||||
          If you're receiving errors while using Petals, <a target="_blank" href="https://health.petals.dev/">check swarm health</a> and consider <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">adding your GPU to the swarm</a> to help.
 | 
			
		||||
        </p>
 | 
			
		||||
        <p class="help is-warning">
 | 
			
		||||
          Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
 | 
			
		||||
        </p>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -110,6 +110,7 @@ const defaults:ChatSettings = {
 | 
			
		|||
  hppWithSummaryPrompt: false,
 | 
			
		||||
  imageGenerationSize: '',
 | 
			
		||||
  stopSequence: '',
 | 
			
		||||
  aggressiveStop: false,
 | 
			
		||||
  userMessageStart: '',
 | 
			
		||||
  assistantMessageStart: '',
 | 
			
		||||
  systemMessageStart: '',
 | 
			
		||||
| 
						 | 
				
			
			@ -524,6 +525,13 @@ const chatSettingsList: ChatSetting[] = [
 | 
			
		|||
        },
 | 
			
		||||
        hide: isNotPetals
 | 
			
		||||
      },
 | 
			
		||||
      {
 | 
			
		||||
        key: 'aggressiveStop',
 | 
			
		||||
        name: 'Use aggressive stop',
 | 
			
		||||
        title: 'Sometimes generation can continue even after a stop sequence. This will stop generation client side if generation continues after stop sequence.',
 | 
			
		||||
        type: 'boolean',
 | 
			
		||||
        hide: isNotPetals
 | 
			
		||||
      },
 | 
			
		||||
      {
 | 
			
		||||
        key: 'userMessageStart',
 | 
			
		||||
        name: 'User Message Start Sequence',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -114,6 +114,7 @@ export type ChatSettings = {
 | 
			
		|||
    useResponseAlteration?: boolean;
 | 
			
		||||
    responseAlterations?: ResponseAlteration[];
 | 
			
		||||
    stopSequence: string;
 | 
			
		||||
    aggressiveStop: boolean;
 | 
			
		||||
    userMessageStart: string;
 | 
			
		||||
    assistantMessageStart: string;
 | 
			
		||||
    systemMessageStart: string;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue