Merge pull request #282 from Webifi/main
Bug fixes, Llama template changes
This commit is contained in:
commit
02cc45e553
|
@ -3,7 +3,7 @@
|
||||||
import { cleanContent, mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
|
import { cleanContent, mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
|
||||||
import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
|
import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
|
||||||
import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
|
import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
|
||||||
import { deleteMessage, getChatSettingValueNullDefault, insertMessages, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage } from './Storage.svelte'
|
import { deleteMessage, getChatSettingValueNullDefault, insertMessages, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage, getChat } from './Storage.svelte'
|
||||||
import { scrollToBottom, scrollToMessage } from './Util.svelte'
|
import { scrollToBottom, scrollToMessage } from './Util.svelte'
|
||||||
import { getDefaultModel, getRequestSettingList } from './Settings.svelte'
|
import { getDefaultModel, getRequestSettingList } from './Settings.svelte'
|
||||||
import { v4 as uuidv4 } from 'uuid'
|
import { v4 as uuidv4 } from 'uuid'
|
||||||
|
@ -62,7 +62,8 @@ export class ChatRequest {
|
||||||
async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
|
async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
|
||||||
// TODO: Continue to break this method down to smaller chunks
|
// TODO: Continue to break this method down to smaller chunks
|
||||||
const _this = this
|
const _this = this
|
||||||
const chat = _this.chat
|
const chat = getChat(_this.chat.id)
|
||||||
|
this.setChat(chat)
|
||||||
const chatSettings = _this.chat.settings
|
const chatSettings = _this.chat.settings
|
||||||
const chatId = chat.id
|
const chatId = chat.id
|
||||||
const imagePromptDetect = /^\s*(please|can\s+you|will\s+you)*\s*(give|generate|create|show|build|design)\s+(me)*\s*(an|a|set|a\s+set\s+of)*\s*([0-9]+|one|two|three|four)*\s+(image|photo|picture|pic)s*\s*(for\s+me)*\s*(of|[^a-z0-9]+|about|that\s+has|showing|with|having|depicting)\s+[^a-z0-9]*(.*)$/i
|
const imagePromptDetect = /^\s*(please|can\s+you|will\s+you)*\s*(give|generate|create|show|build|design)\s+(me)*\s*(an|a|set|a\s+set\s+of)*\s*([0-9]+|one|two|three|four)*\s+(image|photo|picture|pic)s*\s*(for\s+me)*\s*(of|[^a-z0-9]+|about|that\s+has|showing|with|having|depicting)\s+[^a-z0-9]*(.*)$/i
|
||||||
|
|
|
@ -22,14 +22,14 @@ const chatModelBase = {
|
||||||
See <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">this overview</a> to start, though not all settings translate to Petals.
|
See <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">this overview</a> to start, though not all settings translate to Petals.
|
||||||
<i>Note that some models may mot be functional. See <a target="_blank" href="https://health.petals.dev">https://health.petals.dev</a> for current status.</i>`,
|
<i>Note that some models may mot be functional. See <a target="_blank" href="https://health.petals.dev">https://health.petals.dev</a> for current status.</i>`,
|
||||||
check: checkModel,
|
check: checkModel,
|
||||||
start: '<s>',
|
start: '###',
|
||||||
stop: ['###', '</s>'],
|
stop: ['###', '</s>'],
|
||||||
delimiter: '\n###\n\n',
|
delimiter: '\n###\n###',
|
||||||
userStart: 'User:\n',
|
userStart: ' User: ',
|
||||||
userEnd: '',
|
userEnd: '',
|
||||||
assistantStart: '[[CHARACTER_NAME]]:\n',
|
assistantStart: ' [[CHARACTER_NAME]]: ',
|
||||||
assistantEnd: '',
|
assistantEnd: '',
|
||||||
leadPrompt: '[[CHARACTER_NAME]]:\n',
|
leadPrompt: ' [[CHARACTER_NAME]]: ',
|
||||||
systemEnd: '',
|
systemEnd: '',
|
||||||
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
prompt: 0.000000, // $0.000 per 1000 tokens prompt
|
||||||
completion: 0.000000, // $0.000 per 1000 tokens completion
|
completion: 0.000000, // $0.000 per 1000 tokens completion
|
||||||
|
@ -52,35 +52,35 @@ const chatModelBase = {
|
||||||
} as ModelDetail
|
} as ModelDetail
|
||||||
|
|
||||||
export const chatModels : Record<string, ModelDetail> = {
|
export const chatModels : Record<string, ModelDetail> = {
|
||||||
// 'enoch/llama-65b-hf': {
|
'enoch/llama-65b-hf': {
|
||||||
// ...chatModelBase,
|
...chatModelBase,
|
||||||
// label: 'Petals - Llama-65b'
|
label: 'Petals - Llama-65b',
|
||||||
// },
|
max: 2048
|
||||||
// 'codellama/CodeLlama-34b-Instruct-hf ': {
|
},
|
||||||
// ...chatModelBase,
|
|
||||||
// label: 'Petals - CodeLlama-34b',
|
|
||||||
// max: 2048
|
|
||||||
// },
|
|
||||||
'timdettmers/guanaco-65b': {
|
'timdettmers/guanaco-65b': {
|
||||||
...chatModelBase,
|
...chatModelBase,
|
||||||
label: 'Petals - Guanaco-65b',
|
label: 'Petals - Guanaco-65b',
|
||||||
max: 2048
|
max: 2048
|
||||||
},
|
},
|
||||||
'meta-llama/Llama-2-70b-hf': {
|
// 'codellama/CodeLlama-34b-Instruct-hf ': {
|
||||||
...chatModelBase,
|
// ...chatModelBase,
|
||||||
label: 'Petals - Llama-2-70b'
|
// label: 'Petals - CodeLlama-34b',
|
||||||
},
|
// max: 2048
|
||||||
|
// },
|
||||||
|
// 'meta-llama/Llama-2-70b-hf': {
|
||||||
|
// ...chatModelBase,
|
||||||
|
// label: 'Petals - Llama-2-70b'
|
||||||
|
// },
|
||||||
'meta-llama/Llama-2-70b-chat-hf': {
|
'meta-llama/Llama-2-70b-chat-hf': {
|
||||||
...chatModelBase,
|
...chatModelBase,
|
||||||
label: 'Petals - Llama-2-70b-chat',
|
label: 'Petals - Llama-2-70b-chat',
|
||||||
start: '<s>',
|
start: '<s>',
|
||||||
stop: ['</s>', '[INST]', '[/INST]', '<<SYS>>', '<</SYS>>'],
|
stop: ['</s>', '[INST]', '[/INST]', '<<SYS>>', '<</SYS>>'],
|
||||||
delimiter: ' </s><s>',
|
delimiter: '</s><s>',
|
||||||
userStart: '[INST][[SYSTEM_PROMPT]]',
|
userStart: '[INST] User: ',
|
||||||
userEnd: ' [/INST]',
|
userEnd: ' [/INST]',
|
||||||
assistantStart: '[[SYSTEM_PROMPT]][[USER_PROMPT]]',
|
systemStart: '[INST] <<SYS>>\n',
|
||||||
systemStart: '<<SYS>>\n',
|
systemEnd: '\n<</SYS>> [/INST]'
|
||||||
systemEnd: '\n<</SYS>>\n\n'
|
|
||||||
// leadPrompt: ''
|
// leadPrompt: ''
|
||||||
},
|
},
|
||||||
'stabilityai/StableBeluga2': {
|
'stabilityai/StableBeluga2': {
|
||||||
|
|
|
@ -42,6 +42,8 @@ export const chatRequest = async (
|
||||||
const signal = chatRequest.controller.signal
|
const signal = chatRequest.controller.signal
|
||||||
const providerData = chatRequest.providerData.petals || {}
|
const providerData = chatRequest.providerData.petals || {}
|
||||||
chatRequest.providerData.petals = providerData
|
chatRequest.providerData.petals = providerData
|
||||||
|
const modelChanged = model !== providerData.lastModel
|
||||||
|
providerData.lastModel = model
|
||||||
let ws: WebSocket = providerData.ws
|
let ws: WebSocket = providerData.ws
|
||||||
const abortListener = (e:Event) => {
|
const abortListener = (e:Event) => {
|
||||||
chatRequest.updating = false
|
chatRequest.updating = false
|
||||||
|
@ -161,7 +163,14 @@ export const chatRequest = async (
|
||||||
|
|
||||||
let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
|
let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
|
||||||
|
|
||||||
let inputPrompt = startSequence
|
let midDel = ''
|
||||||
|
for (let i = 0, l = delimiter.length; i < l; i++) {
|
||||||
|
const chk = delimiter.slice(0, i)
|
||||||
|
if ((providerData.knownBuffer || '').slice(0 - (i + 1)) === chk) midDel = chk
|
||||||
|
}
|
||||||
|
midDel = midDel.length ? delimiter.slice(0, 0 - midDel.length) : delimiter
|
||||||
|
|
||||||
|
let inputPrompt = midDel
|
||||||
|
|
||||||
const getNewWs = ():Promise<WebSocket> => new Promise<WebSocket>((resolve, reject) => {
|
const getNewWs = ():Promise<WebSocket> => new Promise<WebSocket>((resolve, reject) => {
|
||||||
// console.warn('requesting new ws')
|
// console.warn('requesting new ws')
|
||||||
|
@ -183,7 +192,7 @@ export const chatRequest = async (
|
||||||
throw err
|
throw err
|
||||||
}
|
}
|
||||||
// console.warn('got new ws')
|
// console.warn('got new ws')
|
||||||
inputPrompt = lastPrompt
|
inputPrompt = lastPrompt + delimiter
|
||||||
providerData.knownBuffer = ''
|
providerData.knownBuffer = ''
|
||||||
providerData.ws = nws
|
providerData.ws = nws
|
||||||
resolve(nws)
|
resolve(nws)
|
||||||
|
@ -221,7 +230,8 @@ export const chatRequest = async (
|
||||||
const kb = providerData.knownBuffer.replace(rgxp, '')
|
const kb = providerData.knownBuffer.replace(rgxp, '')
|
||||||
const lp = lastPrompt.replace(rgxp, '')
|
const lp = lastPrompt.replace(rgxp, '')
|
||||||
const lm = kb === lp
|
const lm = kb === lp
|
||||||
if (!lm || countTokens(model, providerData.knownBuffer + inputPrompt) >= maxTokens) {
|
if (!chatSettings.holdSocket || modelChanged || !lm ||
|
||||||
|
countTokens(model, providerData.knownBuffer + inputPrompt) >= maxTokens) {
|
||||||
wsOpen && ws.close()
|
wsOpen && ws.close()
|
||||||
ws = await getNewWs()
|
ws = await getNewWs()
|
||||||
}
|
}
|
||||||
|
@ -231,7 +241,7 @@ export const chatRequest = async (
|
||||||
ws = await getNewWs()
|
ws = await getNewWs()
|
||||||
}
|
}
|
||||||
|
|
||||||
inputPrompt += delimiter + nextPrompt
|
inputPrompt += nextPrompt
|
||||||
providerData.knownBuffer += inputPrompt
|
providerData.knownBuffer += inputPrompt
|
||||||
|
|
||||||
// console.log(
|
// console.log(
|
||||||
|
|
Loading…
Reference in New Issue