From 8e35b198da22216aa6921c9675fa434449754af6 Mon Sep 17 00:00:00 2001 From: Webifi Date: Thu, 20 Jul 2023 20:32:36 -0500 Subject: [PATCH 01/17] Add [[LAST-PROMPT]] system prompt expansion --- src/lib/ChatRequest.svelte | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte index 60dd80c..d0bc259 100644 --- a/src/lib/ChatRequest.svelte +++ b/src/lib/ChatRequest.svelte @@ -159,6 +159,8 @@ export class ChatRequest { const spl = chatSettings.sendSystemPromptLast const sp = messagePayload[0] if (sp) { + const lastSp = sp.content.split('::END-PROMPT::') + sp.content = lastSp[0].trim() if (messagePayload.length > 1) { sp.content = sp.content.replace(/::STARTUP::[\s\S]*::EOM::/, '::EOM::') sp.content = sp.content.replace(/::STARTUP::[\s\S]*::START-PROMPT::/, '::START-PROMPT::') @@ -170,7 +172,7 @@ export class ChatRequest { if (spl) { messagePayload.shift() if (messagePayload[messagePayload.length - 1]?.role === 'user') { - messagePayload.splice(-2, 0, sp) + messagePayload.splice(-1, 0, sp) } else { messagePayload.push(sp) } @@ -196,6 +198,10 @@ export class ChatRequest { }).filter(m => m.content.length) messagePayload.splice(spl ? 0 : 1, 0, ...ms.concat(splitSystem.map(s => ({ role: 'system', content: s.trim() } as Message)).filter(m => m.content.length))) } + const lastSpC = lastSp[1]?.trim() || '' + if (lastSpC.length) { + messagePayload.push({ role: 'system', content: lastSpC } as Message) + } } } @@ -356,9 +362,9 @@ export class ChatRequest { const results = hiddenPromptPrefix.split(/[\s\r\n]*::EOM::[\s\r\n]*/).reduce((a, m) => { m = m.trim() if (m.length) { - if (m.match(/[[USER_PROMPT]]/)) { + if (m.match(/\[\[USER_PROMPT\]\]/)) { injectedPrompt = true - m.replace(/[[USER_PROMPT]]/g, lastMessage.content) + m.replace(/\[\[USER_PROMPT\]\]/g, lastMessage.content) } a.push({ role: a.length % 2 === 0 ? 'user' : 'assistant', content: m } as Message) } From 914055f1f990ade466aae68227d2847f3a9bfb80 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sat, 22 Jul 2023 01:42:21 -0500 Subject: [PATCH 02/17] Initial test of Petals as alternative to OpenAI --- package-lock.json | 7 + package.json | 1 + src/lib/ApiUtil.svelte | 2 + src/lib/ChatCompletionResponse.svelte | 12 +- src/lib/ChatRequest.svelte | 269 ++++++++++++++++++-------- src/lib/ChatSettingsModal.svelte | 29 +-- src/lib/Home.svelte | 73 ++++++- src/lib/Models.svelte | 123 +++++++++++- src/lib/Settings.svelte | 24 ++- src/lib/Stats.svelte | 48 +++-- src/lib/Types.svelte | 22 ++- 11 files changed, 469 insertions(+), 141 deletions(-) diff --git a/package-lock.json b/package-lock.json index e645d79..15510ce 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,6 +27,7 @@ "eslint-plugin-svelte3": "^4.0.0", "flourite": "^1.2.4", "gpt-tokenizer": "^2.0.0", + "llama-tokenizer-js": "^1.1.1", "postcss": "^8.4.26", "sass": "^1.63.6", "stacking-order": "^2.0.0", @@ -3182,6 +3183,12 @@ "node": ">= 0.8.0" } }, + "node_modules/llama-tokenizer-js": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.1.1.tgz", + "integrity": "sha512-5H2oSJnSufWGhOw6hcCGAqJeB3POmeIBzRklH3cXs0L4MSAYdwoYTodni4j5YVo6jApdhaqaNVU66gNRgXeBRg==", + "dev": true + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", diff --git a/package.json b/package.json index a4dfe5d..ec0de52 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "eslint-plugin-svelte3": "^4.0.0", "flourite": "^1.2.4", "gpt-tokenizer": "^2.0.0", + "llama-tokenizer-js": "^1.1.1", "postcss": "^8.4.26", "sass": "^1.63.6", "stacking-order": "^2.0.0", diff --git a/src/lib/ApiUtil.svelte b/src/lib/ApiUtil.svelte index 77edbc7..ceded8b 100644 --- a/src/lib/ApiUtil.svelte +++ b/src/lib/ApiUtil.svelte @@ -5,10 +5,12 @@ const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations' const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models' const endpointEmbeddings = import.meta.env.VITE_ENDPOINT_EMBEDDINGS || '/v1/embeddings' + const endpointPetalsV2Websocket = import.meta.env.VITE_PEDALS_WEBSOCKET || 'wss://chat.petals.dev/api/v2/generate' export const getApiBase = ():string => apiBase export const getEndpointCompletions = ():string => endpointCompletions export const getEndpointGenerations = ():string => endpointGenerations export const getEndpointModels = ():string => endpointModels export const getEndpointEmbeddings = ():string => endpointEmbeddings + export const getPetalsV2Websocket = ():string => endpointPetalsV2Websocket \ No newline at end of file diff --git a/src/lib/ChatCompletionResponse.svelte b/src/lib/ChatCompletionResponse.svelte index 03c1c31..a6743f6 100644 --- a/src/lib/ChatCompletionResponse.svelte +++ b/src/lib/ChatCompletionResponse.svelte @@ -1,9 +1,9 @@
@@ -60,6 +69,8 @@ onMount(() => {

+ + {#if !apiKey} @@ -70,6 +81,66 @@ onMount(() => { {/if} + + +
+
+ + {#if showPetalsSettings} +

Set Petals API Endpoint:

+
{ + if (event.target && event.target[0].value) { + setGlobalSettingValueByKey('pedalsEndpoint', (event.target[0].value).trim()) + } else { + setGlobalSettingValueByKey('pedalsEndpoint', '') + } + }} + > +

+ +

+

+ +

+ + +
+

+ Only use {getPetalsV2Websocket()} for testing. You must set up your own Petals server for actual use. +

+

+ Do not send sensitive information when using Petals. +

+

+ For more information on Petals, see + https://github.com/petals-infra/chat.petals.dev +

+ {/if} + {#if !apiKey} +

+ Please enter your OpenAI API key above to use ChatGPT-web. + It is required to use ChatGPT-web. +

+ {/if} +
+
{#if apiKey}
diff --git a/src/lib/Models.svelte b/src/lib/Models.svelte index ed8861d..1289939 100644 --- a/src/lib/Models.svelte +++ b/src/lib/Models.svelte @@ -1,43 +1,63 @@ \ No newline at end of file diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte index f4540c6..fb525af 100644 --- a/src/lib/Settings.svelte +++ b/src/lib/Settings.svelte @@ -1,7 +1,6 @@ {#if show} - {#if setting.header} -

- {@html setting.header} + {#if header} +

+ {@html header}

{/if}
diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte index fb525af..09a3011 100644 --- a/src/lib/Settings.svelte +++ b/src/lib/Settings.svelte @@ -17,7 +17,7 @@ import { type ChatSortOption } from './Types.svelte' - import { getTokens } from './Models.svelte' + import { getModelDetail, getTokens } from './Models.svelte' export const defaultModel:Model = 'gpt-3.5-turbo' @@ -410,6 +410,10 @@ const modelSetting: ChatSetting & SettingSelect = { afterChange: (chatId, setting) => true // refresh settings } +const isNotOpenAI = (chatId) => { + return getModelDetail(getChatSettings(chatId).model).type !== 'OpenAIChat' +} + const chatSettingsList: ChatSetting[] = [ profileSetting, ...systemPromptSettings, @@ -420,7 +424,8 @@ const chatSettingsList: ChatSetting[] = [ key: 'stream', name: 'Stream Response', title: 'Stream responses as they are generated.', - type: 'boolean' + type: 'boolean', + hide: isNotOpenAI }, { key: 'temperature', @@ -451,7 +456,8 @@ const chatSettingsList: ChatSetting[] = [ min: 1, max: 10, step: 1, - type: 'number' + type: 'number', + hide: isNotOpenAI }, { key: 'max_tokens', @@ -463,6 +469,7 @@ const chatSettingsList: ChatSetting[] = [ max: 32768, step: 1, type: 'number', + hide: isNotOpenAI, forceApi: true // Since default here is different than gpt default, will make sure we always send it }, { @@ -472,7 +479,8 @@ const chatSettingsList: ChatSetting[] = [ min: -2, max: 2, step: 0.2, - type: 'number' + type: 'number', + hide: isNotOpenAI }, { key: 'frequency_penalty', @@ -481,7 +489,8 @@ const chatSettingsList: ChatSetting[] = [ min: -2, max: 2, step: 0.2, - type: 'number' + type: 'number', + hide: isNotOpenAI }, { // logit bias editor not implemented yet From df222e7028ee17db227cf8f7e0ed520fa7f0c65d Mon Sep 17 00:00:00 2001 From: Webifi Date: Sat, 22 Jul 2023 13:23:24 -0500 Subject: [PATCH 04/17] Try to import chat name suggestion --- src/lib/Chat.svelte | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib/Chat.svelte b/src/lib/Chat.svelte index 6826d9c..ebe6874 100644 --- a/src/lib/Chat.svelte +++ b/src/lib/Chat.svelte @@ -273,13 +273,16 @@ const suggestName = async (): Promise => { const suggestMessage: Message = { role: 'user', - content: "Using appropriate language, please give a 5 word summary of this conversation's topic.", + content: "Using appropriate language, please tell me a short 6 word summary of this conversation's topic for use as a book title. Only respond with the summary.", uuid: uuidv4() } const suggestMessages = $currentChatMessages.slice(0, 10) // limit to first 10 messages suggestMessages.push(suggestMessage) + chatRequest.updating = true + chatRequest.updatingMessage = 'Getting suggestion for chat name...' + const response = await chatRequest.sendRequest(suggestMessages, { chat, autoAddMessages: false, @@ -297,7 +300,7 @@ }) } else { response.getMessages().forEach(m => { - const name = m.content.split(/\s+/).slice(0, 8).join(' ').trim() + const name = m.content.split(/\s+/).slice(0, 8).join(' ').replace(/^[^a-z0-9!?]+|[^a-z0-9!?]+$/gi, '').trim() if (name) chat.name = name }) saveChatStore() From 9a6004c55d36bac64b393fe45ee832cc60b75910 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sat, 22 Jul 2023 13:24:18 -0500 Subject: [PATCH 05/17] More changes for Petals integration --- src/lib/ApiUtil.svelte | 4 +- src/lib/ChatCompletionResponse.svelte | 4 + src/lib/ChatRequest.svelte | 204 ++------------------------ src/lib/ChatRequestOpenAi.svelte | 100 +++++++++++++ src/lib/ChatRequestPetals.svelte | 126 ++++++++++++++++ src/lib/Home.svelte | 6 +- src/lib/Models.svelte | 23 +-- src/lib/Stats.svelte | 19 +-- src/lib/Types.svelte | 3 +- 9 files changed, 271 insertions(+), 218 deletions(-) create mode 100644 src/lib/ChatRequestOpenAi.svelte create mode 100644 src/lib/ChatRequestPetals.svelte diff --git a/src/lib/ApiUtil.svelte b/src/lib/ApiUtil.svelte index ceded8b..afd2f7f 100644 --- a/src/lib/ApiUtil.svelte +++ b/src/lib/ApiUtil.svelte @@ -5,12 +5,12 @@ const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations' const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models' const endpointEmbeddings = import.meta.env.VITE_ENDPOINT_EMBEDDINGS || '/v1/embeddings' - const endpointPetalsV2Websocket = import.meta.env.VITE_PEDALS_WEBSOCKET || 'wss://chat.petals.dev/api/v2/generate' + const endpointPetals = import.meta.env.VITE_PEDALS_WEBSOCKET || 'wss://chat.petals.dev/api/v2/generate' export const getApiBase = ():string => apiBase export const getEndpointCompletions = ():string => endpointCompletions export const getEndpointGenerations = ():string => endpointGenerations export const getEndpointModels = ():string => endpointModels export const getEndpointEmbeddings = ():string => endpointEmbeddings - export const getPetalsV2Websocket = ():string => endpointPetalsV2Websocket + export const getPetals = ():string => endpointPetals \ No newline at end of file diff --git a/src/lib/ChatCompletionResponse.svelte b/src/lib/ChatCompletionResponse.svelte index a6743f6..ab5fcff 100644 --- a/src/lib/ChatCompletionResponse.svelte +++ b/src/lib/ChatCompletionResponse.svelte @@ -65,6 +65,10 @@ export class ChatCompletionResponse { this.promptTokenCount = tokens } + getPromptTokenCount (): number { + return this.promptTokenCount + } + async updateImageFromSyncResponse (response: ResponseImage, prompt: string, model: Model) { this.setModel(model) for (let i = 0; i < response.data.length; i++) { diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte index 20b5626..40c966e 100644 --- a/src/lib/ChatRequest.svelte +++ b/src/lib/ChatRequest.svelte @@ -6,10 +6,11 @@ import { deleteMessage, getChatSettingValueNullDefault, insertMessages, getApiKey, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage } from './Storage.svelte' import { scrollToBottom, scrollToMessage } from './Util.svelte' import { getRequestSettingList, defaultModel } from './Settings.svelte' - import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source' import { v4 as uuidv4 } from 'uuid' import { get } from 'svelte/store' - import { getEndpoint, getModelDetail, getRoleTag } from './Models.svelte' + import { getEndpoint, getModelDetail } from './Models.svelte' + import { runOpenAiCompletionRequest } from './ChatRequestOpenAi.svelte' + import { runPetalsCompletionRequest } from './ChatRequestPetals.svelte' export class ChatRequest { constructor () { @@ -27,6 +28,14 @@ export class ChatRequest { this.chat = chat } + getChat (): Chat { + return this.chat + } + + getChatSettings (): ChatSettings { + return this.chat.settings + } + // Common error handler async handleError (response) { let errorResponse @@ -258,193 +267,10 @@ export class ChatRequest { _this.controller = new AbortController() const signal = _this.controller.signal - if (modelDetail.type === 'PetalsV2Websocket') { - // Petals - const ws = new WebSocket(getEndpoint(model)) - const abortListener = (e:Event) => { - _this.updating = false - _this.updatingMessage = '' - chatResponse.updateFromError('User aborted request.') - signal.removeEventListener('abort', abortListener) - ws.close() - } - signal.addEventListener('abort', abortListener) - const stopSequences = modelDetail.stop || ['###'] - const stopSequencesC = stopSequences.slice() - const stopSequence = stopSequencesC.shift() - chatResponse.onFinish(() => { - _this.updating = false - _this.updatingMessage = '' - }) - ws.onopen = () => { - ws.send(JSON.stringify({ - type: 'open_inference_session', - model, - max_length: maxTokens || opts.maxTokens - })) - ws.onmessage = event => { - const response = JSON.parse(event.data) - if (!response.ok) { - const err = new Error('Error opening socket: ' + response.traceback) - console.error(err) - throw err - } - const rMessages = request.messages || [] as Message[] - const inputArray = (rMessages).reduce((a, m) => { - const c = getRoleTag(m.role, model, chatSettings) + m.content - a.push(c) - return a - }, [] as string[]) - const lastMessage = rMessages[rMessages.length - 1] - if (lastMessage && lastMessage.role !== 'assistant') { - inputArray.push(getRoleTag('assistant', model, chatSettings)) - } - const petalsRequest = { - type: 'generate', - inputs: (request.messages || [] as Message[]).reduce((a, m) => { - const c = getRoleTag(m.role, model, chatSettings) + m.content - a.push(c) - return a - }, [] as string[]).join(stopSequence), - max_new_tokens: 3, // wait for up to 3 tokens before displaying - stop_sequence: stopSequence, - doSample: 1, - temperature: request.temperature || 0, - top_p: request.top_p || 0, - extra_stop_sequences: stopSequencesC - } - ws.send(JSON.stringify(petalsRequest)) - ws.onmessage = event => { - // Remove updating indicator - _this.updating = 1 // hide indicator, but still signal we're updating - _this.updatingMessage = '' - const response = JSON.parse(event.data) - if (!response.ok) { - const err = new Error('Error in response: ' + response.traceback) - console.error(err) - throw err - } - window.setTimeout(() => { - chatResponse.updateFromAsyncResponse( - { - model, - choices: [{ - delta: { - content: response.outputs, - role: 'assistant' - }, - finish_reason: (response.stop ? 'stop' : null) - }] - } as any - ) - if (response.stop) { - const message = chatResponse.getMessages()[0] - if (message) { - for (let i = 0, l = stopSequences.length; i < l; i++) { - if (message.content.endsWith(stopSequences[i])) { - message.content = message.content.slice(0, message.content.length - stopSequences[i].length) - updateMessages(chatId) - } - } - } - } - }, 1) - } - } - ws.onclose = () => { - _this.updating = false - _this.updatingMessage = '' - chatResponse.updateFromClose() - } - ws.onerror = err => { - console.error(err) - throw err - } - } + if (modelDetail.type === 'Petals') { + await runPetalsCompletionRequest(request, _this as any, chatResponse as any, signal, opts) } else { - // OpenAI - const abortListener = (e:Event) => { - _this.updating = false - _this.updatingMessage = '' - chatResponse.updateFromError('User aborted request.') - signal.removeEventListener('abort', abortListener) - } - signal.addEventListener('abort', abortListener) - const fetchOptions = { - method: 'POST', - headers: { - Authorization: `Bearer ${getApiKey()}`, - 'Content-Type': 'application/json' - }, - body: JSON.stringify(request), - signal - } - - if (opts.streaming) { - /** - * Streaming request/response - * We'll get the response a token at a time, as soon as they are ready - */ - chatResponse.onFinish(() => { - _this.updating = false - _this.updatingMessage = '' - }) - fetchEventSource(getEndpoint(model), { - ...fetchOptions, - openWhenHidden: true, - onmessage (ev) { - // Remove updating indicator - _this.updating = 1 // hide indicator, but still signal we're updating - _this.updatingMessage = '' - // console.log('ev.data', ev.data) - if (!chatResponse.hasFinished()) { - if (ev.data === '[DONE]') { - // ?? anything to do when "[DONE]"? - } else { - const data = JSON.parse(ev.data) - // console.log('data', data) - window.setTimeout(() => { chatResponse.updateFromAsyncResponse(data) }, 1) - } - } - }, - onclose () { - _this.updating = false - _this.updatingMessage = '' - chatResponse.updateFromClose() - }, - onerror (err) { - console.error(err) - throw err - }, - async onopen (response) { - if (response.ok && response.headers.get('content-type') === EventStreamContentType) { - // everything's good - } else { - // client-side errors are usually non-retriable: - await _this.handleError(response) - } - } - }).catch(err => { - _this.updating = false - _this.updatingMessage = '' - chatResponse.updateFromError(err.message) - }) - } else { - /** - * Non-streaming request/response - * We'll get the response all at once, after a long delay - */ - const response = await fetch(getEndpoint(model), fetchOptions) - if (!response.ok) { - await _this.handleError(response) - } else { - const json = await response.json() - // Remove updating indicator - _this.updating = false - _this.updatingMessage = '' - chatResponse.updateFromSyncResponse(json) - } - } + await runOpenAiCompletionRequest(request, _this as any, chatResponse as any, signal, opts) } } catch (e) { // console.error(e) @@ -456,7 +282,7 @@ export class ChatRequest { return chatResponse } - private getModel (): Model { + getModel (): Model { return this.chat.settings.model || defaultModel } diff --git a/src/lib/ChatRequestOpenAi.svelte b/src/lib/ChatRequestOpenAi.svelte new file mode 100644 index 0000000..37495ef --- /dev/null +++ b/src/lib/ChatRequestOpenAi.svelte @@ -0,0 +1,100 @@ + \ No newline at end of file diff --git a/src/lib/ChatRequestPetals.svelte b/src/lib/ChatRequestPetals.svelte new file mode 100644 index 0000000..b0c1bac --- /dev/null +++ b/src/lib/ChatRequestPetals.svelte @@ -0,0 +1,126 @@ + \ No newline at end of file diff --git a/src/lib/Home.svelte b/src/lib/Home.svelte index c86a17a..a69b1c2 100644 --- a/src/lib/Home.svelte +++ b/src/lib/Home.svelte @@ -3,7 +3,7 @@ import Footer from './Footer.svelte' import { replace } from 'svelte-spa-router' import { onMount } from 'svelte' - import { getPetalsV2Websocket } from './ApiUtil.svelte' + import { getPetals } from './ApiUtil.svelte' $: apiKey = $apiKeyStorage @@ -112,7 +112,7 @@ const setPetalsEnabled = (event: Event) => { aria-label="PetalsAPI Endpoint" type="text" class="input" - placeholder={getPetalsV2Websocket()} + placeholder={getPetals()} value={$globalStorage.pedalsEndpoint || ''} />

@@ -123,7 +123,7 @@ const setPetalsEnabled = (event: Event) => {

- Only use {getPetalsV2Websocket()} for testing. You must set up your own Petals server for actual use. + Only use {getPetals()} for testing. You must set up your own Petals server for actual use.

Do not send sensitive information when using Petals. diff --git a/src/lib/Models.svelte b/src/lib/Models.svelte index 1289939..8f03e24 100644 --- a/src/lib/Models.svelte +++ b/src/lib/Models.svelte @@ -1,5 +1,5 @@ \ No newline at end of file From 15dcd27e8f825778af3a274be627635f1038a1a4 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sat, 22 Jul 2023 16:48:26 -0500 Subject: [PATCH 07/17] Get temp and top_p working for Petals --- src/lib/ChatRequestPetals.svelte | 16 ++++++++++++---- src/lib/Settings.svelte | 26 ++++++++++++++++---------- src/lib/Types.svelte | 4 ++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/lib/ChatRequestPetals.svelte b/src/lib/ChatRequestPetals.svelte index 15f0278..eefd31d 100644 --- a/src/lib/ChatRequestPetals.svelte +++ b/src/lib/ChatRequestPetals.svelte @@ -53,6 +53,14 @@ export const runPetalsCompletionRequest = async ( throw err } const rMessages = request.messages || [] as Message[] + // make sure top_p and temperature are set the way we need + let temperature = request.temperature || 0 + if (isNaN(temperature as any) || temperature === 1) temperature = 1 + if (temperature === 0) temperature = 0.0001 + let topP = request.top_p + if (isNaN(topP as any) || topP === 1) topP = 1 + if (topP === 0) topP = 0.0001 + // build the message array const inputArray = (rMessages).reduce((a, m) => { const c = getRoleTag(m.role, model, chatRequest.chat) + m.content a.push(c) @@ -65,11 +73,11 @@ export const runPetalsCompletionRequest = async ( const petalsRequest = { type: 'generate', inputs: inputArray.join(stopSequence), - max_new_tokens: 3, // wait for up to 3 tokens before displaying + max_new_tokens: 1, // wait for up to 1 tokens before displaying stop_sequence: stopSequence, - doSample: 1, - temperature: request.temperature || 0, - top_p: request.top_p || 0, + do_sample: 1, // enable top p and the like + temperature, + top_p: topP, extra_stop_sequences: stopSequencesC } ws.send(JSON.stringify(petalsRequest)) diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte index 29f5be2..5f01837 100644 --- a/src/lib/Settings.svelte +++ b/src/lib/Settings.svelte @@ -55,6 +55,14 @@ export const getExcludeFromProfile = () => { return excludeFromProfile } +const isNotOpenAI = (chatId) => { + return getModelDetail(getChatSettings(chatId).model).type !== 'OpenAIChat' +} + +const isNotPetals = (chatId) => { + return getModelDetail(getChatSettings(chatId).model).type !== 'Petals' +} + const gptDefaults = { model: defaultModel, messages: [], @@ -406,7 +414,13 @@ const modelSetting: ChatSetting & SettingSelect = { key: 'model', name: 'Model', title: 'The model to use - GPT-3.5 is cheaper, but GPT-4 is more powerful.', - header: 'Below are the settings that OpenAI allows to be changed for the API calls. See the OpenAI API docs for more details.', + header: (chatId) => { + if (isNotOpenAI(chatId)) { + return 'Below are the settings that can be changed for the API calls. See this overview to start, though not all settings translate to Petals.' + } else { + return 'Below are the settings that OpenAI allows to be changed for the API calls. See the OpenAI API docs for more details.' + } + }, headerClass: 'is-warning', options: [], type: 'select', @@ -414,14 +428,6 @@ const modelSetting: ChatSetting & SettingSelect = { afterChange: (chatId, setting) => true // refresh settings } -const isNotOpenAI = (chatId) => { - return getModelDetail(getChatSettings(chatId).model).type !== 'OpenAIChat' -} - -const isNotPetals = (chatId) => { - return getModelDetail(getChatSettings(chatId).model).type !== 'Petals' -} - const chatSettingsList: ChatSetting[] = [ profileSetting, ...systemPromptSettings, @@ -448,7 +454,7 @@ const chatSettingsList: ChatSetting[] = [ }, { key: 'top_p', - name: 'Nucleus Sampling', + name: 'Nucleus Sampling (Top-p)', title: 'An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n' + '\n' + 'We generally recommend altering this or temperature but not both', diff --git a/src/lib/Types.svelte b/src/lib/Types.svelte index 30e0319..3757c9f 100644 --- a/src/lib/Types.svelte +++ b/src/lib/Types.svelte @@ -259,8 +259,8 @@ export type ChatSetting = { title: string; forceApi?: boolean; // force in api requests, even if set to default hidden?: boolean; // Hide from setting menus - header?: string; - headerClass?: string; + header?: string | ValueFn; + headerClass?: string | ValueFn; placeholder?: string | ValueFn; hide?: (chatId:number) => boolean; apiTransform?: (chatId:number, setting:ChatSetting, value:any) => any; From 7aadca3c5c52ccfce2e6c8eb28bb6bf367c66027 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sat, 22 Jul 2023 17:08:40 -0500 Subject: [PATCH 08/17] Better error handling for Petals --- src/lib/ChatCompletionResponse.svelte | 8 ++++---- src/lib/ChatRequestPetals.svelte | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib/ChatCompletionResponse.svelte b/src/lib/ChatCompletionResponse.svelte index ab5fcff..72fd4e0 100644 --- a/src/lib/ChatCompletionResponse.svelte +++ b/src/lib/ChatCompletionResponse.svelte @@ -175,15 +175,15 @@ export class ChatCompletionResponse { } as Message) } this.notifyMessageChange() - setTimeout(() => this.finish(), 250) // give others a chance to signal the finish first + setTimeout(() => this.finish(), 200) // give others a chance to signal the finish first } updateFromClose (force: boolean = false): void { if (!this.finished && !this.error && !this.messages?.find(m => m.content)) { - if (!force) return setTimeout(() => this.updateFromClose(true), 250) as any - return this.updateFromError('Unexpected connection termination') + if (!force) return setTimeout(() => this.updateFromClose(true), 300) as any + if (!this.finished) return this.updateFromError('Unexpected connection termination') } - setTimeout(() => this.finish(), 250) // give others a chance to signal the finish first + setTimeout(() => this.finish(), 260) // give others a chance to signal the finish first } onMessageChange = (listener: (m: Message[]) => void): number => diff --git a/src/lib/ChatRequestPetals.svelte b/src/lib/ChatRequestPetals.svelte index eefd31d..356ae12 100644 --- a/src/lib/ChatRequestPetals.svelte +++ b/src/lib/ChatRequestPetals.svelte @@ -49,6 +49,7 @@ export const runPetalsCompletionRequest = async ( const response = JSON.parse(event.data) if (!response.ok) { const err = new Error('Error opening socket: ' + response.traceback) + chatResponse.updateFromError(err.message) console.error(err) throw err } @@ -89,6 +90,7 @@ export const runPetalsCompletionRequest = async ( if (!response.ok) { const err = new Error('Error in response: ' + response.traceback) console.error(err) + chatResponse.updateFromError(err.message) throw err } window.setTimeout(() => { From ca19bab19dae52f7966a7993fcc9d2130b766ec4 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sat, 22 Jul 2023 17:21:01 -0500 Subject: [PATCH 09/17] Don't allow too low of temp or top_p --- src/lib/ChatRequestPetals.svelte | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/ChatRequestPetals.svelte b/src/lib/ChatRequestPetals.svelte index 356ae12..9d72ebe 100644 --- a/src/lib/ChatRequestPetals.svelte +++ b/src/lib/ChatRequestPetals.svelte @@ -56,11 +56,11 @@ export const runPetalsCompletionRequest = async ( const rMessages = request.messages || [] as Message[] // make sure top_p and temperature are set the way we need let temperature = request.temperature || 0 - if (isNaN(temperature as any) || temperature === 1) temperature = 1 - if (temperature === 0) temperature = 0.0001 + if (isNaN(temperature as any)) temperature = 1 + if (!temperature || temperature <= 0) temperature = 0.01 let topP = request.top_p - if (isNaN(topP as any) || topP === 1) topP = 1 - if (topP === 0) topP = 0.0001 + if (topP === undefined || isNaN(topP as any)) topP = 1 + if (!topP || topP <= 0) topP = 0.01 // build the message array const inputArray = (rMessages).reduce((a, m) => { const c = getRoleTag(m.role, model, chatRequest.chat) + m.content From f6380e1cc2ffe56e59820013dc438f65d1ba687b Mon Sep 17 00:00:00 2001 From: Webifi Date: Mon, 24 Jul 2023 15:26:17 -0500 Subject: [PATCH 10/17] Allow Petals and/or OpenAI --- src/lib/ChatMenuItem.svelte | 4 +- src/lib/ChatOptionMenu.svelte | 8 +-- src/lib/ChatRequest.svelte | 5 +- src/lib/ChatSettingField.svelte | 2 +- src/lib/Home.svelte | 61 +++++++++++++--------- src/lib/Models.svelte | 90 ++++++++++++++++++++++++--------- src/lib/Profiles.svelte | 4 +- src/lib/Settings.svelte | 13 +++-- src/lib/Sidebar.svelte | 8 +-- src/lib/Storage.svelte | 7 ++- src/lib/Types.svelte | 1 + 11 files changed, 139 insertions(+), 64 deletions(-) diff --git a/src/lib/ChatMenuItem.svelte b/src/lib/ChatMenuItem.svelte index 4503042..628fe7e 100644 --- a/src/lib/ChatMenuItem.svelte +++ b/src/lib/ChatMenuItem.svelte @@ -1,7 +1,7 @@