Fix some issues with stop sequences and role sequences
This commit is contained in:
		
							parent
							
								
									f56e29b829
								
							
						
					
					
						commit
						38d38bf948
					
				| 
						 | 
				
			
			@ -25,10 +25,16 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
        ws.close()
 | 
			
		||||
      }
 | 
			
		||||
      signal.addEventListener('abort', abortListener)
 | 
			
		||||
      const stopSequences = modelDetail.stop || ['###']
 | 
			
		||||
      const stopSequences = (modelDetail.stop || ['###', '</s>']).slice()
 | 
			
		||||
      const stopSequence = getStopSequence(chat)
 | 
			
		||||
      const stopSequencesC = stopSequences.slice()
 | 
			
		||||
      if (stopSequence === stopSequencesC[0]) stopSequencesC.shift()
 | 
			
		||||
      let stopSequenceC = stopSequence
 | 
			
		||||
      if (stopSequence !== '###') {
 | 
			
		||||
        stopSequences.push(stopSequence)
 | 
			
		||||
        stopSequenceC = '</s>'
 | 
			
		||||
      }
 | 
			
		||||
      const stopSequencesC = stopSequences.filter((ss) => {
 | 
			
		||||
        return ss !== '###' && ss !== stopSequenceC
 | 
			
		||||
      })
 | 
			
		||||
      const maxTokens = getModelMaxTokens(model)
 | 
			
		||||
      let maxLen = Math.min(opts.maxTokens || chatRequest.chat.max_tokens || maxTokens, maxTokens)
 | 
			
		||||
      const promptTokenCount = chatResponse.getPromptTokenCount()
 | 
			
		||||
| 
						 | 
				
			
			@ -36,6 +42,16 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
        maxLen = Math.min(maxLen + promptTokenCount, maxTokens)
 | 
			
		||||
      }
 | 
			
		||||
      chatResponse.onFinish(() => {
 | 
			
		||||
        const message = chatResponse.getMessages()[0]
 | 
			
		||||
        if (message) {
 | 
			
		||||
          for (let i = 0, l = stopSequences.length; i < l; i++) {
 | 
			
		||||
            const ss = stopSequences[i].trim()
 | 
			
		||||
            if (message.content.trim().endsWith(ss)) {
 | 
			
		||||
              message.content = message.content.trim().slice(0, message.content.trim().length - ss.length)
 | 
			
		||||
              updateMessages(chat.id)
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        chatRequest.updating = false
 | 
			
		||||
        chatRequest.updatingMessage = ''
 | 
			
		||||
      })
 | 
			
		||||
| 
						 | 
				
			
			@ -55,8 +71,8 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
          }
 | 
			
		||||
          const rMessages = request.messages || [] as Message[]
 | 
			
		||||
          // make sure top_p and temperature are set the way we need
 | 
			
		||||
          let temperature = request.temperature || 0
 | 
			
		||||
          if (isNaN(temperature as any)) temperature = 1
 | 
			
		||||
          let temperature = request.temperature
 | 
			
		||||
          if (temperature === undefined || isNaN(temperature as any)) temperature = 1
 | 
			
		||||
          if (!temperature || temperature <= 0) temperature = 0.01
 | 
			
		||||
          let topP = request.top_p
 | 
			
		||||
          if (topP === undefined || isNaN(topP as any)) topP = 1
 | 
			
		||||
| 
						 | 
				
			
			@ -64,7 +80,7 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
          // build the message array
 | 
			
		||||
          const inputArray = (rMessages).reduce((a, m) => {
 | 
			
		||||
            const c = getRoleTag(m.role, model, chatRequest.chat) + m.content
 | 
			
		||||
            a.push(c)
 | 
			
		||||
            a.push(c.trim())
 | 
			
		||||
            return a
 | 
			
		||||
          }, [] as string[])
 | 
			
		||||
          const lastMessage = rMessages[rMessages.length - 1]
 | 
			
		||||
| 
						 | 
				
			
			@ -75,12 +91,12 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
            type: 'generate',
 | 
			
		||||
            inputs: inputArray.join(stopSequence),
 | 
			
		||||
            max_new_tokens: 1, // wait for up to 1 tokens before displaying
 | 
			
		||||
            stop_sequence: stopSequence,
 | 
			
		||||
            stop_sequence: stopSequenceC,
 | 
			
		||||
            do_sample: 1, // enable top p and the like
 | 
			
		||||
            temperature,
 | 
			
		||||
            top_p: topP,
 | 
			
		||||
            extra_stop_sequences: stopSequencesC
 | 
			
		||||
          }
 | 
			
		||||
            top_p: topP
 | 
			
		||||
          } as any
 | 
			
		||||
          if (stopSequencesC.length) petalsRequest.extra_stop_sequences = stopSequencesC
 | 
			
		||||
          ws.send(JSON.stringify(petalsRequest))
 | 
			
		||||
          ws.onmessage = event => {
 | 
			
		||||
            // Remove updating indicator
 | 
			
		||||
| 
						 | 
				
			
			@ -106,17 +122,6 @@ export const runPetalsCompletionRequest = async (
 | 
			
		|||
                        }]
 | 
			
		||||
                      } as any
 | 
			
		||||
              )
 | 
			
		||||
              if (response.stop) {
 | 
			
		||||
                const message = chatResponse.getMessages()[0]
 | 
			
		||||
                if (message) {
 | 
			
		||||
                  for (let i = 0, l = stopSequences.length; i < l; i++) {
 | 
			
		||||
                    if (message.content.endsWith(stopSequences[i])) {
 | 
			
		||||
                      message.content = message.content.slice(0, message.content.length - stopSequences[i].length)
 | 
			
		||||
                      updateMessages(chat.id)
 | 
			
		||||
                    }
 | 
			
		||||
                  }
 | 
			
		||||
                }
 | 
			
		||||
              }
 | 
			
		||||
            }, 1)
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,27 +42,49 @@ const modelDetails : Record<string, ModelDetail> = {
 | 
			
		|||
        completion: 0.000004, // $0.004 per 1000 tokens completion
 | 
			
		||||
        max: 16384 // 16k max token buffer
 | 
			
		||||
      },
 | 
			
		||||
      'enoch/llama-65b-hf': {
 | 
			
		||||
        type: 'Petals',
 | 
			
		||||
        label: 'Petals - Llama-65b',
 | 
			
		||||
        stop: ['###', '</s>'],
 | 
			
		||||
        userStart: '<|user|>',
 | 
			
		||||
        assistantStart: '<|[[CHARACTER_NAME]]|>',
 | 
			
		||||
        systemStart: '',
 | 
			
		||||
        prompt: 0.000000, // $0.000 per 1000 tokens prompt
 | 
			
		||||
        completion: 0.000000, // $0.000 per 1000 tokens completion
 | 
			
		||||
        max: 2048 // 2k max token buffer
 | 
			
		||||
      },
 | 
			
		||||
      'timdettmers/guanaco-65b': {
 | 
			
		||||
        type: 'Petals',
 | 
			
		||||
        label: 'Petals - Guanaco-65b',
 | 
			
		||||
        stop: ['###', '</s>'],
 | 
			
		||||
        userStart: '<|user|>',
 | 
			
		||||
        assistantStart: '<|[[CHARACTER_NAME]]|>',
 | 
			
		||||
        systemStart: '',
 | 
			
		||||
        prompt: 0.000000, // $0.000 per 1000 tokens prompt
 | 
			
		||||
        completion: 0.000000, // $0.000 per 1000 tokens completion
 | 
			
		||||
        max: 2048 // 2k max token buffer
 | 
			
		||||
      },
 | 
			
		||||
      'meta-llama/Llama-2-70b-chat-hf': {
 | 
			
		||||
        type: 'Petals',
 | 
			
		||||
        label: 'Petals - Llama-2-70b-chat',
 | 
			
		||||
        stop: ['</s>'],
 | 
			
		||||
        userStart: '[user]',
 | 
			
		||||
        assistantStart: '[[[CHARACTER_NAME]]]',
 | 
			
		||||
        stop: ['###', '</s>'],
 | 
			
		||||
        userStart: '<|user|>',
 | 
			
		||||
        assistantStart: '<|[[CHARACTER_NAME]]|>',
 | 
			
		||||
        systemStart: '',
 | 
			
		||||
        prompt: 0.000000, // $0.000 per 1000 tokens prompt
 | 
			
		||||
        completion: 0.000000, // $0.000 per 1000 tokens completion
 | 
			
		||||
        max: 4096 // 4k max token buffer
 | 
			
		||||
      },
 | 
			
		||||
      'timdettmers/guanaco-65b': {
 | 
			
		||||
      'meta-llama/Llama-2-70b-hf': {
 | 
			
		||||
        type: 'Petals',
 | 
			
		||||
        label: 'Petals - guanaco-65b',
 | 
			
		||||
        stop: ['</s>'],
 | 
			
		||||
        userStart: '[user]',
 | 
			
		||||
        assistantStart: '[[[CHARACTER_NAME]]]',
 | 
			
		||||
        label: 'Petals - Llama-2-70b',
 | 
			
		||||
        stop: ['###', '</s>'],
 | 
			
		||||
        userStart: '<|user|>',
 | 
			
		||||
        assistantStart: '<|[[CHARACTER_NAME]]|>',
 | 
			
		||||
        systemStart: '',
 | 
			
		||||
        prompt: 0.000000, // $0.000 per 1000 tokens prompt
 | 
			
		||||
        completion: 0.000000, // $0.000 per 1000 tokens completion
 | 
			
		||||
        max: 2048 // 2k max token buffer
 | 
			
		||||
        max: 4096 // 4k max token buffer
 | 
			
		||||
      }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -107,8 +129,10 @@ export const supportedModels : Record<string, ModelDetail> = {
 | 
			
		|||
      'gpt-4-32k': modelDetails['gpt-4-32k'],
 | 
			
		||||
      'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
 | 
			
		||||
      'gpt-4-32k-0613': modelDetails['gpt-4-32k'],
 | 
			
		||||
      'enoch/llama-65b-hf': modelDetails['enoch/llama-65b-hf'],
 | 
			
		||||
      'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b'],
 | 
			
		||||
      'meta-llama/Llama-2-70b-hf': modelDetails['meta-llama/Llama-2-70b-hf'],
 | 
			
		||||
      'meta-llama/Llama-2-70b-chat-hf': modelDetails['meta-llama/Llama-2-70b-chat-hf']
 | 
			
		||||
      // 'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b']
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const lookupList = {
 | 
			
		||||
| 
						 | 
				
			
			@ -154,27 +178,27 @@ export const getEndpoint = (model: Model): string => {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
export const getStopSequence = (chat: Chat): string => {
 | 
			
		||||
  return valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
 | 
			
		||||
  return chat.settings.stopSequence || valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export const getUserStart = (chat: Chat): string => {
 | 
			
		||||
  return mergeProfileFields(
 | 
			
		||||
        chat.settings,
 | 
			
		||||
        valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
 | 
			
		||||
        chat.settings.userMessageStart || valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
 | 
			
		||||
      )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export const getAssistantStart = (chat: Chat): string => {
 | 
			
		||||
  return mergeProfileFields(
 | 
			
		||||
        chat.settings,
 | 
			
		||||
        valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
 | 
			
		||||
        chat.settings.assistantMessageStart || valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
 | 
			
		||||
      )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export const getSystemStart = (chat: Chat): string => {
 | 
			
		||||
  return mergeProfileFields(
 | 
			
		||||
        chat.settings,
 | 
			
		||||
        valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
 | 
			
		||||
        chat.settings.systemMessageStart || valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
 | 
			
		||||
      )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue