Fix some issues with stop sequences and role sequences
This commit is contained in:
parent
f56e29b829
commit
38d38bf948
|
@ -25,10 +25,16 @@ export const runPetalsCompletionRequest = async (
|
|||
ws.close()
|
||||
}
|
||||
signal.addEventListener('abort', abortListener)
|
||||
const stopSequences = modelDetail.stop || ['###']
|
||||
const stopSequences = (modelDetail.stop || ['###', '</s>']).slice()
|
||||
const stopSequence = getStopSequence(chat)
|
||||
const stopSequencesC = stopSequences.slice()
|
||||
if (stopSequence === stopSequencesC[0]) stopSequencesC.shift()
|
||||
let stopSequenceC = stopSequence
|
||||
if (stopSequence !== '###') {
|
||||
stopSequences.push(stopSequence)
|
||||
stopSequenceC = '</s>'
|
||||
}
|
||||
const stopSequencesC = stopSequences.filter((ss) => {
|
||||
return ss !== '###' && ss !== stopSequenceC
|
||||
})
|
||||
const maxTokens = getModelMaxTokens(model)
|
||||
let maxLen = Math.min(opts.maxTokens || chatRequest.chat.max_tokens || maxTokens, maxTokens)
|
||||
const promptTokenCount = chatResponse.getPromptTokenCount()
|
||||
|
@ -36,6 +42,16 @@ export const runPetalsCompletionRequest = async (
|
|||
maxLen = Math.min(maxLen + promptTokenCount, maxTokens)
|
||||
}
|
||||
chatResponse.onFinish(() => {
|
||||
const message = chatResponse.getMessages()[0]
|
||||
if (message) {
|
||||
for (let i = 0, l = stopSequences.length; i < l; i++) {
|
||||
const ss = stopSequences[i].trim()
|
||||
if (message.content.trim().endsWith(ss)) {
|
||||
message.content = message.content.trim().slice(0, message.content.trim().length - ss.length)
|
||||
updateMessages(chat.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
chatRequest.updating = false
|
||||
chatRequest.updatingMessage = ''
|
||||
})
|
||||
|
@ -55,8 +71,8 @@ export const runPetalsCompletionRequest = async (
|
|||
}
|
||||
const rMessages = request.messages || [] as Message[]
|
||||
// make sure top_p and temperature are set the way we need
|
||||
let temperature = request.temperature || 0
|
||||
if (isNaN(temperature as any)) temperature = 1
|
||||
let temperature = request.temperature
|
||||
if (temperature === undefined || isNaN(temperature as any)) temperature = 1
|
||||
if (!temperature || temperature <= 0) temperature = 0.01
|
||||
let topP = request.top_p
|
||||
if (topP === undefined || isNaN(topP as any)) topP = 1
|
||||
|
@ -64,7 +80,7 @@ export const runPetalsCompletionRequest = async (
|
|||
// build the message array
|
||||
const inputArray = (rMessages).reduce((a, m) => {
|
||||
const c = getRoleTag(m.role, model, chatRequest.chat) + m.content
|
||||
a.push(c)
|
||||
a.push(c.trim())
|
||||
return a
|
||||
}, [] as string[])
|
||||
const lastMessage = rMessages[rMessages.length - 1]
|
||||
|
@ -75,12 +91,12 @@ export const runPetalsCompletionRequest = async (
|
|||
type: 'generate',
|
||||
inputs: inputArray.join(stopSequence),
|
||||
max_new_tokens: 1, // wait for up to 1 tokens before displaying
|
||||
stop_sequence: stopSequence,
|
||||
stop_sequence: stopSequenceC,
|
||||
do_sample: 1, // enable top p and the like
|
||||
temperature,
|
||||
top_p: topP,
|
||||
extra_stop_sequences: stopSequencesC
|
||||
}
|
||||
top_p: topP
|
||||
} as any
|
||||
if (stopSequencesC.length) petalsRequest.extra_stop_sequences = stopSequencesC
|
||||
ws.send(JSON.stringify(petalsRequest))
|
||||
ws.onmessage = event => {
|
||||
// Remove updating indicator
|
||||
|
@ -106,17 +122,6 @@ export const runPetalsCompletionRequest = async (
|
|||
}]
|
||||
} as any
|
||||
)
|
||||
if (response.stop) {
|
||||
const message = chatResponse.getMessages()[0]
|
||||
if (message) {
|
||||
for (let i = 0, l = stopSequences.length; i < l; i++) {
|
||||
if (message.content.endsWith(stopSequences[i])) {
|
||||
message.content = message.content.slice(0, message.content.length - stopSequences[i].length)
|
||||
updateMessages(chat.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 1)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,27 +42,49 @@ const modelDetails : Record<string, ModelDetail> = {
|
|||
completion: 0.000004, // $0.004 per 1000 tokens completion
|
||||
max: 16384 // 16k max token buffer
|
||||
},
|
||||
'enoch/llama-65b-hf': {
|
||||
type: 'Petals',
|
||||
label: 'Petals - Llama-65b',
|
||||
stop: ['###', '</s>'],
|
||||
userStart: '<|user|>',
|
||||
assistantStart: '<|[[CHARACTER_NAME]]|>',
|
||||
systemStart: '',
|
||||
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
||||
completion: 0.000000, // $0.000 per 1000 tokens completion
|
||||
max: 2048 // 2k max token buffer
|
||||
},
|
||||
'timdettmers/guanaco-65b': {
|
||||
type: 'Petals',
|
||||
label: 'Petals - Guanaco-65b',
|
||||
stop: ['###', '</s>'],
|
||||
userStart: '<|user|>',
|
||||
assistantStart: '<|[[CHARACTER_NAME]]|>',
|
||||
systemStart: '',
|
||||
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
||||
completion: 0.000000, // $0.000 per 1000 tokens completion
|
||||
max: 2048 // 2k max token buffer
|
||||
},
|
||||
'meta-llama/Llama-2-70b-chat-hf': {
|
||||
type: 'Petals',
|
||||
label: 'Petals - Llama-2-70b-chat',
|
||||
stop: ['</s>'],
|
||||
userStart: '[user]',
|
||||
assistantStart: '[[[CHARACTER_NAME]]]',
|
||||
stop: ['###', '</s>'],
|
||||
userStart: '<|user|>',
|
||||
assistantStart: '<|[[CHARACTER_NAME]]|>',
|
||||
systemStart: '',
|
||||
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
||||
completion: 0.000000, // $0.000 per 1000 tokens completion
|
||||
max: 4096 // 4k max token buffer
|
||||
},
|
||||
'timdettmers/guanaco-65b': {
|
||||
'meta-llama/Llama-2-70b-hf': {
|
||||
type: 'Petals',
|
||||
label: 'Petals - guanaco-65b',
|
||||
stop: ['</s>'],
|
||||
userStart: '[user]',
|
||||
assistantStart: '[[[CHARACTER_NAME]]]',
|
||||
label: 'Petals - Llama-2-70b',
|
||||
stop: ['###', '</s>'],
|
||||
userStart: '<|user|>',
|
||||
assistantStart: '<|[[CHARACTER_NAME]]|>',
|
||||
systemStart: '',
|
||||
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
||||
completion: 0.000000, // $0.000 per 1000 tokens completion
|
||||
max: 2048 // 2k max token buffer
|
||||
max: 4096 // 4k max token buffer
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -107,8 +129,10 @@ export const supportedModels : Record<string, ModelDetail> = {
|
|||
'gpt-4-32k': modelDetails['gpt-4-32k'],
|
||||
'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
|
||||
'gpt-4-32k-0613': modelDetails['gpt-4-32k'],
|
||||
'enoch/llama-65b-hf': modelDetails['enoch/llama-65b-hf'],
|
||||
'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b'],
|
||||
'meta-llama/Llama-2-70b-hf': modelDetails['meta-llama/Llama-2-70b-hf'],
|
||||
'meta-llama/Llama-2-70b-chat-hf': modelDetails['meta-llama/Llama-2-70b-chat-hf']
|
||||
// 'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b']
|
||||
}
|
||||
|
||||
const lookupList = {
|
||||
|
@ -154,27 +178,27 @@ export const getEndpoint = (model: Model): string => {
|
|||
}
|
||||
|
||||
export const getStopSequence = (chat: Chat): string => {
|
||||
return valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
|
||||
return chat.settings.stopSequence || valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
|
||||
}
|
||||
|
||||
export const getUserStart = (chat: Chat): string => {
|
||||
return mergeProfileFields(
|
||||
chat.settings,
|
||||
valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
|
||||
chat.settings.userMessageStart || valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
|
||||
)
|
||||
}
|
||||
|
||||
export const getAssistantStart = (chat: Chat): string => {
|
||||
return mergeProfileFields(
|
||||
chat.settings,
|
||||
valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
|
||||
chat.settings.assistantMessageStart || valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
|
||||
)
|
||||
}
|
||||
|
||||
export const getSystemStart = (chat: Chat): string => {
|
||||
return mergeProfileFields(
|
||||
chat.settings,
|
||||
valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
|
||||
chat.settings.systemMessageStart || valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
|
||||
)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue