Merge pull request #235 from Webifi/main

Experimental support for Petals/Llama 2
This commit is contained in:
Niek van der Maas 2023-07-25 07:32:18 +02:00 committed by GitHub
commit f877ac09ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 854 additions and 218 deletions

7
package-lock.json generated
View File

@ -27,6 +27,7 @@
"eslint-plugin-svelte3": "^4.0.0", "eslint-plugin-svelte3": "^4.0.0",
"flourite": "^1.2.4", "flourite": "^1.2.4",
"gpt-tokenizer": "^2.0.0", "gpt-tokenizer": "^2.0.0",
"llama-tokenizer-js": "^1.1.1",
"postcss": "^8.4.26", "postcss": "^8.4.26",
"sass": "^1.63.6", "sass": "^1.63.6",
"stacking-order": "^2.0.0", "stacking-order": "^2.0.0",
@ -3182,6 +3183,12 @@
"node": ">= 0.8.0" "node": ">= 0.8.0"
} }
}, },
"node_modules/llama-tokenizer-js": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.1.1.tgz",
"integrity": "sha512-5H2oSJnSufWGhOw6hcCGAqJeB3POmeIBzRklH3cXs0L4MSAYdwoYTodni4j5YVo6jApdhaqaNVU66gNRgXeBRg==",
"dev": true
},
"node_modules/locate-path": { "node_modules/locate-path": {
"version": "6.0.0", "version": "6.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",

View File

@ -33,6 +33,7 @@
"eslint-plugin-svelte3": "^4.0.0", "eslint-plugin-svelte3": "^4.0.0",
"flourite": "^1.2.4", "flourite": "^1.2.4",
"gpt-tokenizer": "^2.0.0", "gpt-tokenizer": "^2.0.0",
"llama-tokenizer-js": "^1.1.1",
"postcss": "^8.4.26", "postcss": "^8.4.26",
"sass": "^1.63.6", "sass": "^1.63.6",
"stacking-order": "^2.0.0", "stacking-order": "^2.0.0",

View File

@ -5,10 +5,12 @@
const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations' const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations'
const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models' const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models'
const endpointEmbeddings = import.meta.env.VITE_ENDPOINT_EMBEDDINGS || '/v1/embeddings' const endpointEmbeddings = import.meta.env.VITE_ENDPOINT_EMBEDDINGS || '/v1/embeddings'
const endpointPetals = import.meta.env.VITE_PEDALS_WEBSOCKET || 'wss://chat.petals.dev/api/v2/generate'
export const getApiBase = ():string => apiBase export const getApiBase = ():string => apiBase
export const getEndpointCompletions = ():string => endpointCompletions export const getEndpointCompletions = ():string => endpointCompletions
export const getEndpointGenerations = ():string => endpointGenerations export const getEndpointGenerations = ():string => endpointGenerations
export const getEndpointModels = ():string => endpointModels export const getEndpointModels = ():string => endpointModels
export const getEndpointEmbeddings = ():string => endpointEmbeddings export const getEndpointEmbeddings = ():string => endpointEmbeddings
export const getPetals = ():string => endpointPetals
</script> </script>

View File

@ -40,6 +40,7 @@
import { openModal } from 'svelte-modals' import { openModal } from 'svelte-modals'
import PromptInput from './PromptInput.svelte' import PromptInput from './PromptInput.svelte'
import { ChatRequest } from './ChatRequest.svelte' import { ChatRequest } from './ChatRequest.svelte'
import { getModelDetail } from './Models.svelte'
export let params = { chatId: '' } export let params = { chatId: '' }
const chatId: number = parseInt(params.chatId) const chatId: number = parseInt(params.chatId)
@ -245,6 +246,19 @@
chatRequest.updating = true chatRequest.updating = true
chatRequest.updatingMessage = '' chatRequest.updatingMessage = ''
let doScroll = true
let didScroll = false
const checkUserScroll = (e: Event) => {
const el = e.target as HTMLElement
if (el && e.isTrusted && didScroll) {
// from user
doScroll = (window.innerHeight + window.scrollY + 10) >= document.body.offsetHeight
}
}
window.addEventListener('scroll', checkUserScroll)
try { try {
const response = await chatRequest.sendRequest($currentChatMessages, { const response = await chatRequest.sendRequest($currentChatMessages, {
chat, chat,
@ -252,7 +266,8 @@
streaming: chatSettings.stream, streaming: chatSettings.stream,
fillMessage, fillMessage,
onMessageChange: (messages) => { onMessageChange: (messages) => {
scrollToBottom(true) if (doScroll) scrollToBottom(true)
didScroll = !!messages[0]?.content
} }
}) })
await response.promiseToFinish() await response.promiseToFinish()
@ -264,6 +279,8 @@
console.error(e) console.error(e)
} }
window.removeEventListener('scroll', checkUserScroll)
chatRequest.updating = false chatRequest.updating = false
chatRequest.updatingMessage = '' chatRequest.updatingMessage = ''
@ -273,13 +290,16 @@
const suggestName = async (): Promise<void> => { const suggestName = async (): Promise<void> => {
const suggestMessage: Message = { const suggestMessage: Message = {
role: 'user', role: 'user',
content: "Using appropriate language, please give a 5 word summary of this conversation's topic.", content: "Using appropriate language, please tell me a short 6 word summary of this conversation's topic for use as a book title. Only respond with the summary.",
uuid: uuidv4() uuid: uuidv4()
} }
const suggestMessages = $currentChatMessages.slice(0, 10) // limit to first 10 messages const suggestMessages = $currentChatMessages.slice(0, 10) // limit to first 10 messages
suggestMessages.push(suggestMessage) suggestMessages.push(suggestMessage)
chatRequest.updating = true
chatRequest.updatingMessage = 'Getting suggestion for chat name...'
const response = await chatRequest.sendRequest(suggestMessages, { const response = await chatRequest.sendRequest(suggestMessages, {
chat, chat,
autoAddMessages: false, autoAddMessages: false,
@ -297,7 +317,7 @@
}) })
} else { } else {
response.getMessages().forEach(m => { response.getMessages().forEach(m => {
const name = m.content.split(/\s+/).slice(0, 8).join(' ').trim() const name = m.content.split(/\s+/).slice(0, 8).join(' ').replace(/^[^a-z0-9!?]+|[^a-z0-9!?]+$/gi, '').trim()
if (name) chat.name = name if (name) chat.name = name
}) })
saveChatStore() saveChatStore()
@ -420,7 +440,7 @@
<div class="content has-text-centered running-total-container"> <div class="content has-text-centered running-total-container">
{#each Object.entries(chat.usage || {}) as [model, usage]} {#each Object.entries(chat.usage || {}) as [model, usage]}
<p class="is-size-7 running-totals"> <p class="is-size-7 running-totals">
<em>{model}</em> total <span class="has-text-weight-bold">{usage.total_tokens}</span> <em>{getModelDetail(model || '').label || model}</em> total <span class="has-text-weight-bold">{usage.total_tokens}</span>
tokens ~= <span class="has-text-weight-bold">${getPrice(usage, model).toFixed(6)}</span> tokens ~= <span class="has-text-weight-bold">${getPrice(usage, model).toFixed(6)}</span>
</p> </p>
{/each} {/each}

View File

@ -1,9 +1,9 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import { setImage } from './ImageStore.svelte' import { setImage } from './ImageStore.svelte'
import { countTokens } from './Models.svelte'
// TODO: Integrate API calls // TODO: Integrate API calls
import { addMessage, getLatestKnownModel, saveChatStore, setLatestKnownModel, subtractRunningTotal, updateRunningTotal } from './Storage.svelte' import { addMessage, getLatestKnownModel, setLatestKnownModel, subtractRunningTotal, updateMessages, updateRunningTotal } from './Storage.svelte'
import type { Chat, ChatCompletionOpts, ChatImage, Message, Model, Response, ResponseImage, Usage } from './Types.svelte' import type { Chat, ChatCompletionOpts, ChatImage, Message, Model, Response, ResponseImage, Usage } from './Types.svelte'
import { encode } from 'gpt-tokenizer'
import { v4 as uuidv4 } from 'uuid' import { v4 as uuidv4 } from 'uuid'
export class ChatCompletionResponse { export class ChatCompletionResponse {
@ -65,6 +65,10 @@ export class ChatCompletionResponse {
this.promptTokenCount = tokens this.promptTokenCount = tokens
} }
getPromptTokenCount (): number {
return this.promptTokenCount
}
async updateImageFromSyncResponse (response: ResponseImage, prompt: string, model: Model) { async updateImageFromSyncResponse (response: ResponseImage, prompt: string, model: Model) {
this.setModel(model) this.setModel(model)
for (let i = 0; i < response.data.length; i++) { for (let i = 0; i < response.data.length; i++) {
@ -138,10 +142,10 @@ export class ChatCompletionResponse {
message.content = this.initialFillMerge(message.content, choice.delta?.content) message.content = this.initialFillMerge(message.content, choice.delta?.content)
message.content += choice.delta.content message.content += choice.delta.content
} }
completionTokenCount += encode(message.content).length completionTokenCount += countTokens(this.model, message.content)
message.model = response.model message.model = response.model
message.finish_reason = choice.finish_reason message.finish_reason = choice.finish_reason
message.streaming = choice.finish_reason === null && !this.finished message.streaming = !choice.finish_reason && !this.finished
this.messages[i] = message this.messages[i] = message
}) })
// total up the tokens // total up the tokens
@ -171,15 +175,15 @@ export class ChatCompletionResponse {
} as Message) } as Message)
} }
this.notifyMessageChange() this.notifyMessageChange()
setTimeout(() => this.finish(), 250) // give others a chance to signal the finish first setTimeout(() => this.finish(), 200) // give others a chance to signal the finish first
} }
updateFromClose (force: boolean = false): void { updateFromClose (force: boolean = false): void {
if (!this.finished && !this.error && !this.messages?.find(m => m.content)) { if (!this.finished && !this.error && !this.messages?.find(m => m.content)) {
if (!force) return setTimeout(() => this.updateFromClose(true), 250) as any if (!force) return setTimeout(() => this.updateFromClose(true), 300) as any
return this.updateFromError('Unexpected connection termination') if (!this.finished) return this.updateFromError('Unexpected connection termination')
} }
setTimeout(() => this.finish(), 250) // give others a chance to signal the finish first setTimeout(() => this.finish(), 260) // give others a chance to signal the finish first
} }
onMessageChange = (listener: (m: Message[]) => void): number => onMessageChange = (listener: (m: Message[]) => void): number =>
@ -209,10 +213,10 @@ export class ChatCompletionResponse {
} }
private finish = (): void => { private finish = (): void => {
this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
updateMessages(this.chat.id)
if (this.finished) return if (this.finished) return
this.finished = true this.finished = true
this.messages.forEach(m => { m.streaming = false }) // make sure all are marked stopped
saveChatStore()
const message = this.messages[0] const message = this.messages[0]
const model = this.model || getLatestKnownModel(this.chat.settings.model) const model = this.model || getLatestKnownModel(this.chat.settings.model)
if (message) { if (message) {

View File

@ -1,7 +1,7 @@
<script lang="ts"> <script lang="ts">
import { replace } from 'svelte-spa-router' import { replace } from 'svelte-spa-router'
import type { Chat } from './Types.svelte' import type { Chat } from './Types.svelte'
import { apiKeyStorage, deleteChat, pinMainMenu, saveChatStore } from './Storage.svelte' import { deleteChat, hasActiveModels, pinMainMenu, saveChatStore } from './Storage.svelte'
import Fa from 'svelte-fa/src/fa.svelte' import Fa from 'svelte-fa/src/fa.svelte'
import { faTrash, faCircleCheck, faPencil } from '@fortawesome/free-solid-svg-icons/index' import { faTrash, faCircleCheck, faPencil } from '@fortawesome/free-solid-svg-icons/index'
import { faMessage } from '@fortawesome/free-regular-svg-icons/index' import { faMessage } from '@fortawesome/free-regular-svg-icons/index'
@ -86,7 +86,7 @@
<a <a
href={`#/chat/${chat.id}`} href={`#/chat/${chat.id}`}
class="chat-menu-item" class="chat-menu-item"
class:is-waiting={waitingForConfirm} class:is-disabled={!$apiKeyStorage} class:is-active={activeChatId === chat.id} class:is-waiting={waitingForConfirm} class:is-disabled={!hasActiveModels()} class:is-active={activeChatId === chat.id}
on:click={() => { $pinMainMenu = false }} > on:click={() => { $pinMainMenu = false }} >
{#if waitingForConfirm} {#if waitingForConfirm}
<a class="is-pulled-right is-hidden px-1 py-0 has-text-weight-bold delete-button" href={'$'} on:click|preventDefault={() => delChat()}><Fa icon={faCircleCheck} /></a> <a class="is-pulled-right is-hidden px-1 py-0 has-text-weight-bold delete-button" href={'$'} on:click|preventDefault={() => delChat()}><Fa icon={faCircleCheck} /></a>

View File

@ -18,7 +18,7 @@
faEyeSlash faEyeSlash
} from '@fortawesome/free-solid-svg-icons/index' } from '@fortawesome/free-solid-svg-icons/index'
import { faSquareMinus, faSquarePlus as faSquarePlusOutline } from '@fortawesome/free-regular-svg-icons/index' import { faSquareMinus, faSquarePlus as faSquarePlusOutline } from '@fortawesome/free-regular-svg-icons/index'
import { apiKeyStorage, addChatFromJSON, chatsStorage, checkStateChange, clearChats, clearMessages, copyChat, globalStorage, setGlobalSettingValueByKey, showSetChatSettings, pinMainMenu, getChat, deleteChat, saveChatStore, saveCustomProfile } from './Storage.svelte' import { addChatFromJSON, chatsStorage, checkStateChange, clearChats, clearMessages, copyChat, globalStorage, setGlobalSettingValueByKey, showSetChatSettings, pinMainMenu, getChat, deleteChat, saveChatStore, saveCustomProfile, hasActiveModels } from './Storage.svelte'
import { exportAsMarkdown, exportChatAsJSON } from './Export.svelte' import { exportAsMarkdown, exportChatAsJSON } from './Export.svelte'
import { newNameForProfile, restartProfile } from './Profiles.svelte' import { newNameForProfile, restartProfile } from './Profiles.svelte'
import { replace } from 'svelte-spa-router' import { replace } from 'svelte-spa-router'
@ -173,7 +173,7 @@
<span class="menu-icon"><Fa icon={faGear}/></span> Chat Profile Settings <span class="menu-icon"><Fa icon={faGear}/></span> Chat Profile Settings
</a> </a>
<hr class="dropdown-divider"> <hr class="dropdown-divider">
<a href={'#'} class:is-disabled={!$apiKeyStorage} on:click|preventDefault={() => { $apiKeyStorage && close(); $apiKeyStorage && startNewChatWithWarning(chatId) }} class="dropdown-item"> <a href={'#'} class:is-disabled={!hasActiveModels()} on:click|preventDefault={() => { hasActiveModels() && close(); hasActiveModels() && startNewChatWithWarning(chatId) }} class="dropdown-item">
<span class="menu-icon"><Fa icon={faSquarePlus}/></span> New Chat from Default <span class="menu-icon"><Fa icon={faSquarePlus}/></span> New Chat from Default
</a> </a>
<a href={'#'} class:is-disabled={!chatId} on:click|preventDefault={() => { chatId && close(); chatId && startNewChatFromChatId(chatId) }} class="dropdown-item"> <a href={'#'} class:is-disabled={!chatId} on:click|preventDefault={() => { chatId && close(); chatId && startNewChatFromChatId(chatId) }} class="dropdown-item">
@ -196,14 +196,14 @@
<a href={'#'} class="dropdown-item" class:is-disabled={!chatId} on:click|preventDefault={() => { close(); exportChatAsJSON(chatId) }}> <a href={'#'} class="dropdown-item" class:is-disabled={!chatId} on:click|preventDefault={() => { close(); exportChatAsJSON(chatId) }}>
<span class="menu-icon"><Fa icon={faDownload}/></span> Backup Chat JSON <span class="menu-icon"><Fa icon={faDownload}/></span> Backup Chat JSON
</a> </a>
<a href={'#'} class="dropdown-item" class:is-disabled={!$apiKeyStorage} on:click|preventDefault={() => { if (chatId) close(); chatFileInput.click() }}> <a href={'#'} class="dropdown-item" class:is-disabled={!hasActiveModels()} on:click|preventDefault={() => { if (chatId) close(); chatFileInput.click() }}>
<span class="menu-icon"><Fa icon={faUpload}/></span> Restore Chat JSON <span class="menu-icon"><Fa icon={faUpload}/></span> Restore Chat JSON
</a> </a>
<a href={'#'} class="dropdown-item" class:is-disabled={!chatId} on:click|preventDefault={() => { if (chatId) close(); exportAsMarkdown(chatId) }}> <a href={'#'} class="dropdown-item" class:is-disabled={!chatId} on:click|preventDefault={() => { if (chatId) close(); exportAsMarkdown(chatId) }}>
<span class="menu-icon"><Fa icon={faFileExport}/></span> Export Chat Markdown <span class="menu-icon"><Fa icon={faFileExport}/></span> Export Chat Markdown
</a> </a>
<hr class="dropdown-divider"> <hr class="dropdown-divider">
<a href={'#'} class="dropdown-item" class:is-disabled={!$apiKeyStorage} on:click|preventDefault={() => { if (chatId) close(); profileFileInput.click() }}> <a href={'#'} class="dropdown-item" class:is-disabled={!hasActiveModels()} on:click|preventDefault={() => { if (chatId) close(); profileFileInput.click() }}>
<span class="menu-icon"><Fa icon={faUpload}/></span> Restore Profile JSON <span class="menu-icon"><Fa icon={faUpload}/></span> Restore Profile JSON
</a> </a>
<hr class="dropdown-divider"> <hr class="dropdown-divider">

View File

@ -5,11 +5,12 @@
import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request, RequestImageGeneration } from './Types.svelte' import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request, RequestImageGeneration } from './Types.svelte'
import { deleteMessage, getChatSettingValueNullDefault, insertMessages, getApiKey, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage } from './Storage.svelte' import { deleteMessage, getChatSettingValueNullDefault, insertMessages, getApiKey, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage } from './Storage.svelte'
import { scrollToBottom, scrollToMessage } from './Util.svelte' import { scrollToBottom, scrollToMessage } from './Util.svelte'
import { getRequestSettingList, defaultModel } from './Settings.svelte' import { getDefaultModel, getRequestSettingList } from './Settings.svelte'
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
import { getApiBase, getEndpointCompletions, getEndpointGenerations } from './ApiUtil.svelte'
import { v4 as uuidv4 } from 'uuid' import { v4 as uuidv4 } from 'uuid'
import { get } from 'svelte/store' import { get } from 'svelte/store'
import { getEndpoint, getModelDetail } from './Models.svelte'
import { runOpenAiCompletionRequest } from './ChatRequestOpenAi.svelte'
import { runPetalsCompletionRequest } from './ChatRequestPetals.svelte'
export class ChatRequest { export class ChatRequest {
constructor () { constructor () {
@ -25,6 +26,15 @@ export class ChatRequest {
setChat (chat: Chat) { setChat (chat: Chat) {
this.chat = chat this.chat = chat
this.chat.settings.model = this.getModel()
}
getChat (): Chat {
return this.chat
}
getChatSettings (): ChatSettings {
return this.chat.settings
} }
// Common error handler // Common error handler
@ -77,7 +87,7 @@ export class ChatRequest {
const chatResponse = new ChatCompletionResponse(opts) const chatResponse = new ChatCompletionResponse(opts)
try { try {
const response = await fetch(getApiBase() + getEndpointGenerations(), fetchOptions) const response = await fetch(getEndpoint('dall-e-' + size), fetchOptions)
if (!response.ok) { if (!response.ok) {
await _this.handleError(response) await _this.handleError(response)
} else { } else {
@ -159,6 +169,8 @@ export class ChatRequest {
const spl = chatSettings.sendSystemPromptLast const spl = chatSettings.sendSystemPromptLast
const sp = messagePayload[0] const sp = messagePayload[0]
if (sp) { if (sp) {
const lastSp = sp.content.split('::END-PROMPT::')
sp.content = lastSp[0].trim()
if (messagePayload.length > 1) { if (messagePayload.length > 1) {
sp.content = sp.content.replace(/::STARTUP::[\s\S]*::EOM::/, '::EOM::') sp.content = sp.content.replace(/::STARTUP::[\s\S]*::EOM::/, '::EOM::')
sp.content = sp.content.replace(/::STARTUP::[\s\S]*::START-PROMPT::/, '::START-PROMPT::') sp.content = sp.content.replace(/::STARTUP::[\s\S]*::START-PROMPT::/, '::START-PROMPT::')
@ -170,7 +182,7 @@ export class ChatRequest {
if (spl) { if (spl) {
messagePayload.shift() messagePayload.shift()
if (messagePayload[messagePayload.length - 1]?.role === 'user') { if (messagePayload[messagePayload.length - 1]?.role === 'user') {
messagePayload.splice(-2, 0, sp) messagePayload.splice(-1, 0, sp)
} else { } else {
messagePayload.push(sp) messagePayload.push(sp)
} }
@ -196,11 +208,15 @@ export class ChatRequest {
}).filter(m => m.content.length) }).filter(m => m.content.length)
messagePayload.splice(spl ? 0 : 1, 0, ...ms.concat(splitSystem.map(s => ({ role: 'system', content: s.trim() } as Message)).filter(m => m.content.length))) messagePayload.splice(spl ? 0 : 1, 0, ...ms.concat(splitSystem.map(s => ({ role: 'system', content: s.trim() } as Message)).filter(m => m.content.length)))
} }
const lastSpC = lastSp[1]?.trim() || ''
if (lastSpC.length) {
messagePayload.push({ role: 'system', content: lastSpC } as Message)
}
} }
} }
// Get token counts // Get token counts
const promptTokenCount = countPromptTokens(messagePayload, model) const promptTokenCount = countPromptTokens(messagePayload, model, chat)
const maxAllowed = maxTokens - (promptTokenCount + 1) const maxAllowed = maxTokens - (promptTokenCount + 1)
// Build the API request body // Build the API request body
@ -239,6 +255,9 @@ export class ChatRequest {
// Set-up and make the request // Set-up and make the request
const chatResponse = new ChatCompletionResponse(opts) const chatResponse = new ChatCompletionResponse(opts)
const modelDetail = getModelDetail(model)
try { try {
// Add out token count to the response handler // Add out token count to the response handler
// (streaming doesn't return counts, so we need to do it client side) // (streaming doesn't return counts, so we need to do it client side)
@ -248,88 +267,11 @@ export class ChatRequest {
// so we deal with it ourselves // so we deal with it ourselves
_this.controller = new AbortController() _this.controller = new AbortController()
const signal = _this.controller.signal const signal = _this.controller.signal
const abortListener = (e:Event) => {
_this.updating = false
_this.updatingMessage = ''
chatResponse.updateFromError('User aborted request.')
signal.removeEventListener('abort', abortListener)
}
signal.addEventListener('abort', abortListener)
const fetchOptions = { if (modelDetail.type === 'Petals') {
method: 'POST', await runPetalsCompletionRequest(request, _this as any, chatResponse as any, signal, opts)
headers: {
Authorization: `Bearer ${getApiKey()}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(request),
signal
}
if (opts.streaming) {
/**
* Streaming request/response
* We'll get the response a token at a time, as soon as they are ready
*/
chatResponse.onFinish(() => {
_this.updating = false
_this.updatingMessage = ''
})
fetchEventSource(getApiBase() + getEndpointCompletions(), {
...fetchOptions,
openWhenHidden: true,
onmessage (ev) {
// Remove updating indicator
_this.updating = 1 // hide indicator, but still signal we're updating
_this.updatingMessage = ''
// console.log('ev.data', ev.data)
if (!chatResponse.hasFinished()) {
if (ev.data === '[DONE]') {
// ?? anything to do when "[DONE]"?
} else {
const data = JSON.parse(ev.data)
// console.log('data', data)
window.setTimeout(() => { chatResponse.updateFromAsyncResponse(data) }, 1)
}
}
},
onclose () {
_this.updating = false
_this.updatingMessage = ''
chatResponse.updateFromClose()
},
onerror (err) {
console.error(err)
throw err
},
async onopen (response) {
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
// everything's good
} else {
// client-side errors are usually non-retriable:
await _this.handleError(response)
}
}
}).catch(err => {
_this.updating = false
_this.updatingMessage = ''
chatResponse.updateFromError(err.message)
})
} else { } else {
/** await runOpenAiCompletionRequest(request, _this as any, chatResponse as any, signal, opts)
* Non-streaming request/response
* We'll get the response all at once, after a long delay
*/
const response = await fetch(getApiBase() + getEndpointCompletions(), fetchOptions)
if (!response.ok) {
await _this.handleError(response)
} else {
const json = await response.json()
// Remove updating indicator
_this.updating = false
_this.updatingMessage = ''
chatResponse.updateFromSyncResponse(json)
}
} }
} catch (e) { } catch (e) {
// console.error(e) // console.error(e)
@ -341,12 +283,13 @@ export class ChatRequest {
return chatResponse return chatResponse
} }
private getModel (): Model { getModel (): Model {
return this.chat.settings.model || defaultModel return this.chat.settings.model || getDefaultModel()
} }
private buildHiddenPromptPrefixMessages (messages: Message[], insert:boolean = false): Message[] { private buildHiddenPromptPrefixMessages (messages: Message[], insert:boolean = false): Message[] {
const chatSettings = this.chat.settings const chat = this.chat
const chatSettings = chat.settings
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim() const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
const lastMessage = messages[messages.length - 1] const lastMessage = messages[messages.length - 1]
const isContinue = lastMessage?.role === 'assistant' && lastMessage.finish_reason === 'length' const isContinue = lastMessage?.role === 'assistant' && lastMessage.finish_reason === 'length'
@ -356,9 +299,9 @@ export class ChatRequest {
const results = hiddenPromptPrefix.split(/[\s\r\n]*::EOM::[\s\r\n]*/).reduce((a, m) => { const results = hiddenPromptPrefix.split(/[\s\r\n]*::EOM::[\s\r\n]*/).reduce((a, m) => {
m = m.trim() m = m.trim()
if (m.length) { if (m.length) {
if (m.match(/[[USER_PROMPT]]/)) { if (m.match(/\[\[USER_PROMPT\]\]/)) {
injectedPrompt = true injectedPrompt = true
m.replace(/[[USER_PROMPT]]/g, lastMessage.content) m = m.replace(/\[\[USER_PROMPT\]\]/g, lastMessage.content)
} }
a.push({ role: a.length % 2 === 0 ? 'user' : 'assistant', content: m } as Message) a.push({ role: a.length % 2 === 0 ? 'user' : 'assistant', content: m } as Message)
} }
@ -377,7 +320,7 @@ export class ChatRequest {
lastMessage.skipOnce = true lastMessage.skipOnce = true
} }
} }
if (injectedPrompt) results.pop() if (injectedPrompt) messages.pop()
return results return results
} }
return [] return []
@ -387,11 +330,11 @@ export class ChatRequest {
* Gets an estimate of how many extra tokens will be added that won't be part of the visible messages * Gets an estimate of how many extra tokens will be added that won't be part of the visible messages
* @param filtered * @param filtered
*/ */
private getTokenCountPadding (filtered: Message[]): number { private getTokenCountPadding (filtered: Message[], chat: Chat): number {
let result = 0 let result = 0
// add cost of hiddenPromptPrefix // add cost of hiddenPromptPrefix
result += this.buildHiddenPromptPrefixMessages(filtered) result += this.buildHiddenPromptPrefixMessages(filtered)
.reduce((a, m) => a + countMessageTokens(m, this.getModel()), 0) .reduce((a, m) => a + countMessageTokens(m, this.getModel(), chat), 0)
// more here eventually? // more here eventually?
return result return result
} }
@ -413,10 +356,10 @@ export class ChatRequest {
} }
// Get extra counts for when the prompts are finally sent. // Get extra counts for when the prompts are finally sent.
const countPadding = this.getTokenCountPadding(filtered) const countPadding = this.getTokenCountPadding(filtered, chat)
// See if we have enough to apply any of the reduction modes // See if we have enough to apply any of the reduction modes
const fullPromptSize = countPromptTokens(filtered, model) + countPadding const fullPromptSize = countPromptTokens(filtered, model, chat) + countPadding
if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet if (fullPromptSize < chatSettings.summaryThreshold) return await continueRequest() // nothing to do yet
const overMax = fullPromptSize > maxTokens * 0.95 const overMax = fullPromptSize > maxTokens * 0.95
@ -439,12 +382,12 @@ export class ChatRequest {
* ************************************************************* * *************************************************************
*/ */
let promptSize = countPromptTokens(top.concat(rw), model) + countPadding let promptSize = countPromptTokens(top.concat(rw), model, chat) + countPadding
while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) { while (rw.length && rw.length > pinBottom && promptSize >= chatSettings.summaryThreshold) {
const rolled = rw.shift() const rolled = rw.shift()
// Hide messages we're "rolling" // Hide messages we're "rolling"
if (rolled) rolled.suppress = true if (rolled) rolled.suppress = true
promptSize = countPromptTokens(top.concat(rw), model) + countPadding promptSize = countPromptTokens(top.concat(rw), model, chat) + countPadding
} }
// Run a new request, now with the rolled messages hidden // Run a new request, now with the rolled messages hidden
return await _this.sendRequest(get(currentChatMessages), { return await _this.sendRequest(get(currentChatMessages), {
@ -460,26 +403,26 @@ export class ChatRequest {
const bottom = rw.slice(0 - pinBottom) const bottom = rw.slice(0 - pinBottom)
let continueCounter = chatSettings.summaryExtend + 1 let continueCounter = chatSettings.summaryExtend + 1
rw = rw.slice(0, 0 - pinBottom) rw = rw.slice(0, 0 - pinBottom)
let reductionPoolSize = countPromptTokens(rw, model) let reductionPoolSize = countPromptTokens(rw, model, chat)
const ss = Math.abs(chatSettings.summarySize) const ss = Math.abs(chatSettings.summarySize)
const getSS = ():number => (ss < 1 && ss > 0) const getSS = ():number => (ss < 1 && ss > 0)
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced ? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count : Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
const topSize = countPromptTokens(top, model) const topSize = countPromptTokens(top, model, chat)
let maxSummaryTokens = getSS() let maxSummaryTokens = getSS()
let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens) let promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
const summaryRequest = { role: 'user', content: promptSummary } as Message const summaryRequest = { role: 'user', content: promptSummary } as Message
let promptSummarySize = countMessageTokens(summaryRequest, model) let promptSummarySize = countMessageTokens(summaryRequest, model, chat)
// Make sure there is enough room to generate the summary, and try to make sure // Make sure there is enough room to generate the summary, and try to make sure
// the last prompt is a user prompt as that seems to work better for summaries // the last prompt is a user prompt as that seems to work better for summaries
while ((topSize + reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens || while ((topSize + reductionPoolSize + promptSummarySize + maxSummaryTokens) >= maxTokens ||
(reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) { (reductionPoolSize >= 100 && rw[rw.length - 1]?.role !== 'user')) {
bottom.unshift(rw.pop() as Message) bottom.unshift(rw.pop() as Message)
reductionPoolSize = countPromptTokens(rw, model) reductionPoolSize = countPromptTokens(rw, model, chat)
maxSummaryTokens = getSS() maxSummaryTokens = getSS()
promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens) promptSummary = prepareSummaryPrompt(chatId, maxSummaryTokens)
summaryRequest.content = promptSummary summaryRequest.content = promptSummary
promptSummarySize = countMessageTokens(summaryRequest, model) promptSummarySize = countMessageTokens(summaryRequest, model, chat)
} }
if (reductionPoolSize < 50) { if (reductionPoolSize < 50) {
if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.') if (overMax) addError(chatId, 'Check summary settings. Unable to summarize enough messages.')
@ -565,10 +508,10 @@ export class ChatRequest {
// Try to get more of it // Try to get more of it
delete summaryResponse.finish_reason delete summaryResponse.finish_reason
_this.updatingMessage = 'Summarizing more...' _this.updatingMessage = 'Summarizing more...'
let _recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model) let _recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model, chat)
while (rw.length && (_recount + maxSummaryTokens >= maxTokens)) { while (rw.length && (_recount + maxSummaryTokens >= maxTokens)) {
rw.shift() rw.shift()
_recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model) _recount = countPromptTokens(top.concat(rw).concat([summaryRequest]).concat([summaryResponse]), model, chat)
} }
loopCount++ loopCount++
continue continue

View File

@ -0,0 +1,100 @@
<script context="module" lang="ts">
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source'
import ChatCompletionResponse from './ChatCompletionResponse.svelte'
import ChatRequest from './ChatRequest.svelte'
import { getEndpoint } from './Models.svelte'
import { getApiKey } from './Storage.svelte'
import type { ChatCompletionOpts, Request } from './Types.svelte'
export const runOpenAiCompletionRequest = async (
request: Request,
chatRequest: ChatRequest,
chatResponse: ChatCompletionResponse,
signal: AbortSignal,
opts: ChatCompletionOpts) => {
// OpenAI Request
const model = chatRequest.getModel()
const abortListener = (e:Event) => {
chatRequest.updating = false
chatRequest.updatingMessage = ''
chatResponse.updateFromError('User aborted request.')
chatRequest.removeEventListener('abort', abortListener)
}
signal.addEventListener('abort', abortListener)
const fetchOptions = {
method: 'POST',
headers: {
Authorization: `Bearer ${getApiKey()}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(request),
signal
}
if (opts.streaming) {
/**
* Streaming request/response
* We'll get the response a token at a time, as soon as they are ready
*/
chatResponse.onFinish(() => {
chatRequest.updating = false
chatRequest.updatingMessage = ''
})
fetchEventSource(getEndpoint(model), {
...fetchOptions,
openWhenHidden: true,
onmessage (ev) {
// Remove updating indicator
chatRequest.updating = 1 // hide indicator, but still signal we're updating
chatRequest.updatingMessage = ''
// console.log('ev.data', ev.data)
if (!chatResponse.hasFinished()) {
if (ev.data === '[DONE]') {
// ?? anything to do when "[DONE]"?
} else {
const data = JSON.parse(ev.data)
// console.log('data', data)
window.setTimeout(() => { chatResponse.updateFromAsyncResponse(data) }, 1)
}
}
},
onclose () {
chatRequest.updating = false
chatRequest.updatingMessage = ''
chatResponse.updateFromClose()
},
onerror (err) {
console.error(err)
throw err
},
async onopen (response) {
if (response.ok && response.headers.get('content-type') === EventStreamContentType) {
// everything's good
} else {
// client-side errors are usually non-retriable:
await chatRequest.handleError(response)
}
}
}).catch(err => {
chatRequest.updating = false
chatRequest.updatingMessage = ''
chatResponse.updateFromError(err.message)
})
} else {
/**
* Non-streaming request/response
* We'll get the response all at once, after a long delay
*/
const response = await fetch(getEndpoint(model), fetchOptions)
if (!response.ok) {
await chatRequest.handleError(response)
} else {
const json = await response.json()
// Remove updating indicator
chatRequest.updating = false
chatRequest.updatingMessage = ''
chatResponse.updateFromSyncResponse(json)
}
}
}
</script>

View File

@ -0,0 +1,139 @@
<script context="module" lang="ts">
import ChatCompletionResponse from './ChatCompletionResponse.svelte'
import ChatRequest from './ChatRequest.svelte'
import { getEndpoint, getModelDetail, getRoleTag, getStopSequence } from './Models.svelte'
import type { ChatCompletionOpts, Message, Request } from './Types.svelte'
import { getModelMaxTokens } from './Stats.svelte'
import { updateMessages } from './Storage.svelte'
export const runPetalsCompletionRequest = async (
request: Request,
chatRequest: ChatRequest,
chatResponse: ChatCompletionResponse,
signal: AbortSignal,
opts: ChatCompletionOpts) => {
// Petals
const chat = chatRequest.getChat()
const model = chatRequest.getModel()
const modelDetail = getModelDetail(model)
const ws = new WebSocket(getEndpoint(model))
const abortListener = (e:Event) => {
chatRequest.updating = false
chatRequest.updatingMessage = ''
chatResponse.updateFromError('User aborted request.')
signal.removeEventListener('abort', abortListener)
ws.close()
}
signal.addEventListener('abort', abortListener)
const stopSequences = (modelDetail.stop || ['###', '</s>']).slice()
const stopSequence = getStopSequence(chat)
let stopSequenceC = stopSequence
if (stopSequence !== '###') {
stopSequences.push(stopSequence)
stopSequenceC = '</s>'
}
const stopSequencesC = stopSequences.filter((ss) => {
return ss !== '###' && ss !== stopSequenceC
})
const maxTokens = getModelMaxTokens(model)
let maxLen = Math.min(opts.maxTokens || chatRequest.chat.max_tokens || maxTokens, maxTokens)
const promptTokenCount = chatResponse.getPromptTokenCount()
if (promptTokenCount > maxLen) {
maxLen = Math.min(maxLen + promptTokenCount, maxTokens)
}
chatResponse.onFinish(() => {
const message = chatResponse.getMessages()[0]
if (message) {
for (let i = 0, l = stopSequences.length; i < l; i++) {
const ss = stopSequences[i].trim()
if (message.content.trim().endsWith(ss)) {
message.content = message.content.trim().slice(0, message.content.trim().length - ss.length)
updateMessages(chat.id)
}
}
}
chatRequest.updating = false
chatRequest.updatingMessage = ''
})
ws.onopen = () => {
ws.send(JSON.stringify({
type: 'open_inference_session',
model,
max_length: maxLen
}))
ws.onmessage = event => {
const response = JSON.parse(event.data)
if (!response.ok) {
const err = new Error('Error opening socket: ' + response.traceback)
chatResponse.updateFromError(err.message)
console.error(err)
throw err
}
const rMessages = request.messages || [] as Message[]
// make sure top_p and temperature are set the way we need
let temperature = request.temperature
if (temperature === undefined || isNaN(temperature as any)) temperature = 1
if (!temperature || temperature <= 0) temperature = 0.01
let topP = request.top_p
if (topP === undefined || isNaN(topP as any)) topP = 1
if (!topP || topP <= 0) topP = 0.01
// build the message array
const inputArray = (rMessages).reduce((a, m) => {
const c = getRoleTag(m.role, model, chatRequest.chat) + m.content
a.push(c.trim())
return a
}, [] as string[])
const lastMessage = rMessages[rMessages.length - 1]
if (lastMessage && lastMessage.role !== 'assistant') {
inputArray.push(getRoleTag('assistant', model, chatRequest.chat))
}
const petalsRequest = {
type: 'generate',
inputs: inputArray.join(stopSequence),
max_new_tokens: 1, // wait for up to 1 tokens before displaying
stop_sequence: stopSequenceC,
do_sample: 1, // enable top p and the like
temperature,
top_p: topP
} as any
if (stopSequencesC.length) petalsRequest.extra_stop_sequences = stopSequencesC
ws.send(JSON.stringify(petalsRequest))
ws.onmessage = event => {
// Remove updating indicator
chatRequest.updating = 1 // hide indicator, but still signal we're updating
chatRequest.updatingMessage = ''
const response = JSON.parse(event.data)
if (!response.ok) {
const err = new Error('Error in response: ' + response.traceback)
console.error(err)
chatResponse.updateFromError(err.message)
throw err
}
window.setTimeout(() => {
chatResponse.updateFromAsyncResponse(
{
model,
choices: [{
delta: {
content: response.outputs,
role: 'assistant'
},
finish_reason: (response.stop ? 'stop' : null)
}]
} as any
)
}, 1)
}
}
ws.onclose = () => {
chatRequest.updating = false
chatRequest.updatingMessage = ''
chatResponse.updateFromClose()
}
ws.onerror = err => {
console.error(err)
throw err
}
}
}
</script>

View File

@ -3,7 +3,7 @@
// import { getProfile } from './Profiles.svelte' // import { getProfile } from './Profiles.svelte'
import { cleanSettingValue, setChatSettingValue } from './Storage.svelte' import { cleanSettingValue, setChatSettingValue } from './Storage.svelte'
import type { Chat, ChatSetting, ChatSettings, ControlAction, FieldControl, SettingPrompt } from './Types.svelte' import type { Chat, ChatSetting, ChatSettings, ControlAction, FieldControl, SettingPrompt } from './Types.svelte'
import { autoGrowInputOnEvent, errorNotice } from './Util.svelte' import { autoGrowInputOnEvent, errorNotice, valueOf } from './Util.svelte'
// import { replace } from 'svelte-spa-router' // import { replace } from 'svelte-spa-router'
import Fa from 'svelte-fa/src/fa.svelte' import Fa from 'svelte-fa/src/fa.svelte'
import { openModal } from 'svelte-modals' import { openModal } from 'svelte-modals'
@ -23,6 +23,10 @@
const chatId = chat.id const chatId = chat.id
let show = false let show = false
let header = valueOf(chatId, setting.header)
let headerClass = valueOf(chatId, setting.headerClass)
let placeholder = valueOf(chatId, setting.placeholder)
const buildFieldControls = () => { const buildFieldControls = () => {
fieldControls = (setting.fieldControls || [] as FieldControl[]).map(fc => { fieldControls = (setting.fieldControls || [] as FieldControl[]).map(fc => {
return fc.getAction(chatId, setting, chatSettings[setting.key]) return fc.getAction(chatId, setting, chatSettings[setting.key])
@ -38,6 +42,9 @@
afterUpdate(() => { afterUpdate(() => {
show = (typeof setting.hide !== 'function') || !setting.hide(chatId) show = (typeof setting.hide !== 'function') || !setting.hide(chatId)
header = valueOf(chatId, setting.header)
headerClass = valueOf(chatId, setting.headerClass)
placeholder = valueOf(chatId, setting.placeholder)
buildFieldControls() buildFieldControls()
}) })
@ -146,9 +153,9 @@
</script> </script>
{#if show} {#if show}
{#if setting.header} {#if header}
<p class="notification {setting.headerClass}"> <p class="notification {headerClass}">
{@html setting.header} {@html header}
</p> </p>
{/if} {/if}
<div class="field is-horizontal"> <div class="field is-horizontal">
@ -171,7 +178,7 @@
<label class="label" for="settings-{setting.key}" title="{setting.title}">{setting.name}</label> <label class="label" for="settings-{setting.key}" title="{setting.title}">{setting.name}</label>
<textarea <textarea
class="input is-info is-focused chat-input auto-size" class="input is-info is-focused chat-input auto-size"
placeholder={setting.placeholder || ''} placeholder={placeholder || ''}
rows="1" rows="1"
on:input={e => autoGrowInputOnEvent(e)} on:input={e => autoGrowInputOnEvent(e)}
on:change={e => { queueSettingValueChange(e, setting); autoGrowInputOnEvent(e) }} on:change={e => { queueSettingValueChange(e, setting); autoGrowInputOnEvent(e) }}
@ -195,7 +202,7 @@
min={setting.min} min={setting.min}
max={setting.max} max={setting.max}
step={setting.step} step={setting.step}
placeholder={String(setting.placeholder || chatDefaults[setting.key])} placeholder={String(placeholder || chatDefaults[setting.key])}
on:change={e => queueSettingValueChange(e, setting)} on:change={e => queueSettingValueChange(e, setting)}
/> />
{:else if setting.type === 'select' || setting.type === 'select-number'} {:else if setting.type === 'select' || setting.type === 'select-number'}
@ -204,7 +211,7 @@
{#key rkey} {#key rkey}
<select id="settings-{setting.key}" title="{setting.title}" on:change={e => queueSettingValueChange(e, setting) } > <select id="settings-{setting.key}" title="{setting.title}" on:change={e => queueSettingValueChange(e, setting) } >
{#each setting.options as option} {#each setting.options as option}
<option class:is-default={option.value === chatDefaults[setting.key]} value={option.value} selected={option.value === chatSettings[setting.key]}>{option.text}</option> <option class:is-default={option.value === chatDefaults[setting.key]} value={option.value} selected={option.value === chatSettings[setting.key]} disabled={option.disabled}>{option.text}</option>
{/each} {/each}
</select> </select>
{/key} {/key}
@ -233,6 +240,7 @@
title="{setting.title}" title="{setting.title}"
class="input" class="input"
value={chatSettings[setting.key]} value={chatSettings[setting.key]}
placeholder={String(placeholder || chatDefaults[setting.key])}
on:change={e => { queueSettingValueChange(e, setting) }} on:change={e => { queueSettingValueChange(e, setting) }}
> >
</div> </div>

View File

@ -3,7 +3,6 @@
import { getChatDefaults, getChatSettingList, getChatSettingObjectByKey, getExcludeFromProfile } from './Settings.svelte' import { getChatDefaults, getChatSettingList, getChatSettingObjectByKey, getExcludeFromProfile } from './Settings.svelte'
import { import {
saveChatStore, saveChatStore,
apiKeyStorage,
chatsStorage, chatsStorage,
globalStorage, globalStorage,
saveCustomProfile, saveCustomProfile,
@ -13,7 +12,7 @@
checkStateChange, checkStateChange,
addChat addChat
} from './Storage.svelte' } from './Storage.svelte'
import type { Chat, ChatSetting, ResponseModels, SettingSelect, SelectOption, ChatSettings } from './Types.svelte' import type { Chat, ChatSetting, SettingSelect, ChatSettings } from './Types.svelte'
import { errorNotice, sizeTextElements } from './Util.svelte' import { errorNotice, sizeTextElements } from './Util.svelte'
import Fa from 'svelte-fa/src/fa.svelte' import Fa from 'svelte-fa/src/fa.svelte'
import { import {
@ -35,8 +34,7 @@
import { replace } from 'svelte-spa-router' import { replace } from 'svelte-spa-router'
import { openModal } from 'svelte-modals' import { openModal } from 'svelte-modals'
import PromptConfirm from './PromptConfirm.svelte' import PromptConfirm from './PromptConfirm.svelte'
import { getApiBase, getEndpointModels } from './ApiUtil.svelte' import { getModelOptions } from './Models.svelte'
import { supportedModelKeys } from './Models.svelte'
export let chatId:number export let chatId:number
export const show = () => { showSettings() } export const show = () => { showSettings() }
@ -185,30 +183,9 @@
// Refresh settings modal // Refresh settings modal
showSettingsModal++ showSettingsModal++
// Load available models from OpenAI
const allModels = (await (
await fetch(getApiBase() + getEndpointModels(), {
method: 'GET',
headers: {
Authorization: `Bearer ${$apiKeyStorage}`,
'Content-Type': 'application/json'
}
})
).json()) as ResponseModels
const filteredModels = supportedModelKeys.filter((model) => allModels.data.find((m) => m.id === model))
const modelOptions:SelectOption[] = filteredModels.reduce((a, m) => {
const o:SelectOption = {
value: m,
text: m
}
a.push(o)
return a
}, [] as SelectOption[])
// Update the models in the settings // Update the models in the settings
if (modelSetting) { if (modelSetting) {
modelSetting.options = modelOptions modelSetting.options = await getModelOptions()
} }
// Refresh settings modal // Refresh settings modal
showSettingsModal++ showSettingsModal++

View File

@ -11,6 +11,7 @@
import { openModal } from 'svelte-modals' import { openModal } from 'svelte-modals'
import PromptConfirm from './PromptConfirm.svelte' import PromptConfirm from './PromptConfirm.svelte'
import { getImage } from './ImageStore.svelte' import { getImage } from './ImageStore.svelte'
import { getModelDetail } from './Models.svelte'
export let message:Message export let message:Message
export let chatId:number export let chatId:number
@ -245,7 +246,7 @@
<p class="is-size-7 message-note">System Prompt</p> <p class="is-size-7 message-note">System Prompt</p>
{:else if message.usage} {:else if message.usage}
<p class="is-size-7 message-note"> <p class="is-size-7 message-note">
<em>{message.model || defaultModel}</em> using <span class="has-text-weight-bold">{message.usage.total_tokens}</span> <em>{getModelDetail(message.model || '').label || message.model || defaultModel}</em> using <span class="has-text-weight-bold">{message.usage.total_tokens}</span>
tokens ~= <span class="has-text-weight-bold">${getPrice(message.usage, message.model || defaultModel).toFixed(6)}</span> tokens ~= <span class="has-text-weight-bold">${getPrice(message.usage, message.model || defaultModel).toFixed(6)}</span>
</p> </p>
{/if} {/if}

View File

@ -1,16 +1,22 @@
<script lang="ts"> <script lang="ts">
import { apiKeyStorage, lastChatId, getChat, started } from './Storage.svelte' import { apiKeyStorage, globalStorage, lastChatId, getChat, started, setGlobalSettingValueByKey, hasActiveModels, checkStateChange } from './Storage.svelte'
import Footer from './Footer.svelte' import Footer from './Footer.svelte'
import { replace } from 'svelte-spa-router' import { replace } from 'svelte-spa-router'
import { onMount } from 'svelte' import { afterUpdate, onMount } from 'svelte'
import { getPetals } from './ApiUtil.svelte'
import { clearModelOptionCache } from './Models.svelte'
$: apiKey = $apiKeyStorage $: apiKey = $apiKeyStorage
let showPetalsSettings = $globalStorage.enablePetals
let pedalsEndpoint = $globalStorage.pedalsEndpoint
let hasModels = hasActiveModels()
onMount(() => { onMount(() => {
if (!$started) { if (!$started) {
$started = true $started = true
// console.log('started', apiKey, $lastChatId, getChat($lastChatId)) // console.log('started', apiKey, $lastChatId, getChat($lastChatId))
if (apiKey && getChat($lastChatId)) { if (hasActiveModels() && getChat($lastChatId)) {
const chatId = $lastChatId const chatId = $lastChatId
$lastChatId = 0 $lastChatId = 0
replace(`/chat/${chatId}`) replace(`/chat/${chatId}`)
@ -19,21 +25,39 @@ onMount(() => {
$lastChatId = 0 $lastChatId = 0
}) })
afterUpdate(() => {
clearModelOptionCache()
hasModels = hasActiveModels()
pedalsEndpoint = $globalStorage.pedalsEndpoint
$checkStateChange++
})
const setPetalsEnabled = (event: Event) => {
const el = (event.target as HTMLInputElement)
setGlobalSettingValueByKey('enablePetals', !!el.checked)
showPetalsSettings = $globalStorage.enablePetals
}
</script> </script>
<section class="section"> <section class="section">
<article class="message"> <article class="message">
<div class="message-body"> <div class="message-body">
<strong><a href="https://github.com/Niek/chatgpt-web">ChatGPT-web</a></strong> <p class="mb-4">
<strong><a href="https://github.com/Niek/chatgpt-web" target="_blank">ChatGPT-web</a></strong>
is a simple one-page web interface to the OpenAI ChatGPT API. To use it, you need to register for is a simple one-page web interface to the OpenAI ChatGPT API. To use it, you need to register for
<a href="https://platform.openai.com/account/api-keys" target="_blank" rel="noreferrer">an OpenAI API key</a> <a href="https://platform.openai.com/account/api-keys" target="_blank" rel="noreferrer">an OpenAI API key</a>
first. OpenAI bills per token (usage-based), which means it is a lot cheaper than first. OpenAI bills per token (usage-based), which means it is a lot cheaper than
<a href="https://openai.com/blog/chatgpt-plus" target="_blank" rel="noreferrer">ChatGPT Plus</a>, unless you use <a href="https://openai.com/blog/chatgpt-plus" target="_blank" rel="noreferrer">ChatGPT Plus</a>, unless you use
more than 10 million tokens per month. All messages are stored in your browser's local storage, so everything is more than 10 million tokens per month. All messages are stored in your browser's local storage, so everything is
<strong>private</strong>. You can also close the browser tab and come back later to continue the conversation. <strong>private</strong>. You can also close the browser tab and come back later to continue the conversation.
</p>
<p>
As an alternative to OpenAI, you can also use Petals swarm as a free API option for open chat models like Llama 2.
</p>
</div> </div>
</article> </article>
<article class="message" class:is-danger={!apiKey} class:is-warning={apiKey}> <article class="message" class:is-danger={!hasModels} class:is-warning={!apiKey} class:is-info={apiKey}>
<div class="message-body"> <div class="message-body">
Set your OpenAI API key below: Set your OpenAI API key below:
@ -53,19 +77,81 @@ onMount(() => {
type="password" type="password"
autocomplete="off" autocomplete="off"
class="input" class="input"
class:is-danger={!apiKey} class:is-danger={!hasModels}
class:is-warning={!apiKey} class:is-info={apiKey}
value={apiKey} value={apiKey}
/> />
</p> </p>
<p class="control"> <p class="control">
<button class="button is-info" type="submit">Save</button> <button class="button is-info" type="submit">Save</button>
</p> </p>
</form> </form>
{#if !apiKey} {#if !apiKey}
<p class="help is-danger"> <p class:is-danger={!hasModels} class:is-warning={!apiKey}>
Please enter your <a href="https://platform.openai.com/account/api-keys">OpenAI API key</a> above to use ChatGPT-web. Please enter your <a target="_blank" href="https://platform.openai.com/account/api-keys">OpenAI API key</a> above to use Open AI's ChatGPT API.
It is required to use ChatGPT-web. At least one API must be enabled to use ChatGPT-web.
</p>
{/if}
</div>
</article>
<article class="message" class:is-danger={!hasModels} class:is-warning={!showPetalsSettings} class:is-info={showPetalsSettings}>
<div class="message-body">
<label class="label" for="enablePetals">
<input
type="checkbox"
class="checkbox"
id="enablePetals"
checked={!!$globalStorage.enablePetals}
on:click={setPetalsEnabled}
>
Use Petals API and Models (Llama 2)
</label>
{#if showPetalsSettings}
<p>Set Petals API Endpoint:</p>
<form
class="field has-addons has-addons-right"
on:submit|preventDefault={(event) => {
if (event.target && event.target[0].value) {
setGlobalSettingValueByKey('pedalsEndpoint', (event.target[0].value).trim())
} else {
setGlobalSettingValueByKey('pedalsEndpoint', '')
}
}}
>
<p class="control is-expanded">
<input
aria-label="PetalsAPI Endpoint"
type="text"
class="input"
placeholder={getPetals()}
value={$globalStorage.pedalsEndpoint || ''}
/>
</p>
<p class="control">
<button class="button is-info" type="submit">Save</button>
</p>
</form>
{#if !pedalsEndpoint}
<p class="help is-warning">
Please only use the default public API for testing. It's best to <a target="_blank" href="https://github.com/petals-infra/chat.petals.dev">configure a private endpoint</a> and enter it above for connection to the Petals swarm.
</p>
{/if}
<p class="my-4">
<a target="_blank" href="https://petals.dev/">Petals</a> lets you run large language models at home by connecting to a public swarm, BitTorrent-style, without hefty GPU requirements.
</p>
<p class="mb-4">
You are encouraged to <a target="_blank" href="https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server">set up a Petals server to share your GPU resources</a> with the public swarm. Minimum requirements to contribute Llama 2 completions are a GTX&nbsp;1080&nbsp;8GB, but the larger/faster the better.
</p>
<p class="help is-warning">
Because Petals uses a public swarm, <b>do not send sensitive information</b> when using Petals.
</p> </p>
{/if} {/if}
</div> </div>

View File

@ -1,43 +1,108 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import type { ModelDetail, Model } from './Types.svelte' import { getApiBase, getEndpointCompletions, getEndpointGenerations, getEndpointModels, getPetals } from './ApiUtil.svelte'
import { apiKeyStorage, globalStorage } from './Storage.svelte'
import { get, writable } from 'svelte/store'
import type { ModelDetail, Model, ResponseModels, SelectOption, Chat } from './Types.svelte'
import { encode } from 'gpt-tokenizer'
import llamaTokenizer from 'llama-tokenizer-js'
import { mergeProfileFields } from './Profiles.svelte'
import { getChatSettingObjectByKey } from './Settings.svelte'
import { valueOf } from './Util.svelte'
/**
* TODO: All of this + what's scattered about need to be refactored to interfaces and classes
* to make it all more modular
*/
const modelOptionCache = writable([] as SelectOption[])
// Reference: https://openai.com/pricing#language-models // Reference: https://openai.com/pricing#language-models
// Eventually we'll add API hosts and endpoints to this // Eventually we'll add API hosts and endpoints to this
const modelDetails : Record<string, ModelDetail> = { const modelDetails : Record<string, ModelDetail> = {
'gpt-4-32k': { 'gpt-4-32k': {
type: 'OpenAIChat',
prompt: 0.00006, // $0.06 per 1000 tokens prompt prompt: 0.00006, // $0.06 per 1000 tokens prompt
completion: 0.00012, // $0.12 per 1000 tokens completion completion: 0.00012, // $0.12 per 1000 tokens completion
max: 32768 // 32k max token buffer max: 32768 // 32k max token buffer
}, },
'gpt-4': { 'gpt-4': {
type: 'OpenAIChat',
prompt: 0.00003, // $0.03 per 1000 tokens prompt prompt: 0.00003, // $0.03 per 1000 tokens prompt
completion: 0.00006, // $0.06 per 1000 tokens completion completion: 0.00006, // $0.06 per 1000 tokens completion
max: 8192 // 8k max token buffer max: 8192 // 8k max token buffer
}, },
'gpt-3.5': { 'gpt-3.5': {
type: 'OpenAIChat',
prompt: 0.0000015, // $0.0015 per 1000 tokens prompt prompt: 0.0000015, // $0.0015 per 1000 tokens prompt
completion: 0.000002, // $0.002 per 1000 tokens completion completion: 0.000002, // $0.002 per 1000 tokens completion
max: 4096 // 4k max token buffer max: 4096 // 4k max token buffer
}, },
'gpt-3.5-turbo-16k': { 'gpt-3.5-turbo-16k': {
type: 'OpenAIChat',
prompt: 0.000003, // $0.003 per 1000 tokens prompt prompt: 0.000003, // $0.003 per 1000 tokens prompt
completion: 0.000004, // $0.004 per 1000 tokens completion completion: 0.000004, // $0.004 per 1000 tokens completion
max: 16384 // 16k max token buffer max: 16384 // 16k max token buffer
},
'enoch/llama-65b-hf': {
type: 'Petals',
label: 'Petals - Llama-65b',
stop: ['###', '</s>'],
userStart: '<|user|>',
assistantStart: '<|[[CHARACTER_NAME]]|>',
systemStart: '',
prompt: 0.000000, // $0.000 per 1000 tokens prompt
completion: 0.000000, // $0.000 per 1000 tokens completion
max: 2048 // 2k max token buffer
},
'timdettmers/guanaco-65b': {
type: 'Petals',
label: 'Petals - Guanaco-65b',
stop: ['###', '</s>'],
userStart: '<|user|>',
assistantStart: '<|[[CHARACTER_NAME]]|>',
systemStart: '',
prompt: 0.000000, // $0.000 per 1000 tokens prompt
completion: 0.000000, // $0.000 per 1000 tokens completion
max: 2048 // 2k max token buffer
},
'meta-llama/Llama-2-70b-chat-hf': {
type: 'Petals',
label: 'Petals - Llama-2-70b-chat',
stop: ['###', '</s>'],
userStart: '<|user|>',
assistantStart: '<|[[CHARACTER_NAME]]|>',
systemStart: '',
prompt: 0.000000, // $0.000 per 1000 tokens prompt
completion: 0.000000, // $0.000 per 1000 tokens completion
max: 4096 // 4k max token buffer
},
'meta-llama/Llama-2-70b-hf': {
type: 'Petals',
label: 'Petals - Llama-2-70b',
stop: ['###', '</s>'],
userStart: '<|user|>',
assistantStart: '<|[[CHARACTER_NAME]]|>',
systemStart: '',
prompt: 0.000000, // $0.000 per 1000 tokens prompt
completion: 0.000000, // $0.000 per 1000 tokens completion
max: 4096 // 4k max token buffer
} }
} }
const imageModels : Record<string, ModelDetail> = { export const imageModels : Record<string, ModelDetail> = {
'dall-e-1024x1024': { 'dall-e-1024x1024': {
type: 'OpenAIDall-e',
prompt: 0.00, prompt: 0.00,
completion: 0.020, // $0.020 per image completion: 0.020, // $0.020 per image
max: 1000 // 1000 char prompt, max max: 1000 // 1000 char prompt, max
}, },
'dall-e-512x512': { 'dall-e-512x512': {
type: 'OpenAIDall-e',
prompt: 0.00, prompt: 0.00,
completion: 0.018, // $0.018 per image completion: 0.018, // $0.018 per image
max: 1000 // 1000 char prompt, max max: 1000 // 1000 char prompt, max
}, },
'dall-e-256x256': { 'dall-e-256x256': {
type: 'OpenAIDall-e',
prompt: 0.00, prompt: 0.00,
completion: 0.016, // $0.016 per image completion: 0.016, // $0.016 per image
max: 1000 // 1000 char prompt, max max: 1000 // 1000 char prompt, max
@ -47,22 +112,27 @@ const imageModels : Record<string, ModelDetail> = {
const unknownDetail = { const unknownDetail = {
prompt: 0, prompt: 0,
completion: 0, completion: 0,
max: 4096 max: 4096,
} type: 'OpenAIChat'
} as ModelDetail
// See: https://platform.openai.com/docs/models/model-endpoint-compatibility // See: https://platform.openai.com/docs/models/model-endpoint-compatibility
// Eventually we'll add UI for managing this // Eventually we'll add UI for managing this
export const supportedModels : Record<string, ModelDetail> = { export const supportedModels : Record<string, ModelDetail> = {
'gpt-3.5-turbo': modelDetails['gpt-3.5'],
'gpt-3.5-turbo-0301': modelDetails['gpt-3.5'],
'gpt-3.5-turbo-0613': modelDetails['gpt-3.5'],
'gpt-3.5-turbo-16k': modelDetails['gpt-3.5-turbo-16k'],
'gpt-4': modelDetails['gpt-4'], 'gpt-4': modelDetails['gpt-4'],
'gpt-4-0314': modelDetails['gpt-4'], 'gpt-4-0314': modelDetails['gpt-4'],
'gpt-4-0613': modelDetails['gpt-4'], 'gpt-4-0613': modelDetails['gpt-4'],
'gpt-4-32k': modelDetails['gpt-4-32k'], 'gpt-4-32k': modelDetails['gpt-4-32k'],
'gpt-4-32k-0314': modelDetails['gpt-4-32k'], 'gpt-4-32k-0314': modelDetails['gpt-4-32k'],
'gpt-4-32k-0613': modelDetails['gpt-4-32k'], 'gpt-4-32k-0613': modelDetails['gpt-4-32k'],
'gpt-3.5-turbo': modelDetails['gpt-3.5'], // 'enoch/llama-65b-hf': modelDetails['enoch/llama-65b-hf'],
'gpt-3.5-turbo-16k': modelDetails['gpt-3.5-turbo-16k'], // 'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b'],
'gpt-3.5-turbo-0301': modelDetails['gpt-3.5'], 'meta-llama/Llama-2-70b-hf': modelDetails['meta-llama/Llama-2-70b-hf'],
'gpt-3.5-turbo-0613': modelDetails['gpt-3.5'] 'meta-llama/Llama-2-70b-chat-hf': modelDetails['meta-llama/Llama-2-70b-chat-hf']
} }
const lookupList = { const lookupList = {
@ -75,7 +145,7 @@ export const supportedModelKeys = Object.keys({ ...supportedModels, ...imageMode
const tpCache : Record<string, ModelDetail> = {} const tpCache : Record<string, ModelDetail> = {}
export const getModelDetail = (model: Model) => { export const getModelDetail = (model: Model): ModelDetail => {
// First try to get exact match, then from cache // First try to get exact match, then from cache
let r = supportedModels[model] || tpCache[model] let r = supportedModels[model] || tpCache[model]
if (r) return r if (r) return r
@ -93,4 +163,140 @@ export const getModelDetail = (model: Model) => {
return r return r
} }
export const getEndpoint = (model: Model): string => {
const modelDetails = getModelDetail(model)
const gSettings = get(globalStorage)
switch (modelDetails.type) {
case 'Petals':
return gSettings.pedalsEndpoint || getPetals()
case 'OpenAIDall-e':
return getApiBase() + getEndpointGenerations()
case 'OpenAIChat':
default:
return gSettings.openAICompletionEndpoint || (getApiBase() + getEndpointCompletions())
}
}
export const getStopSequence = (chat: Chat): string => {
return chat.settings.stopSequence || valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder)
}
export const getUserStart = (chat: Chat): string => {
return mergeProfileFields(
chat.settings,
chat.settings.userMessageStart || valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder)
)
}
export const getAssistantStart = (chat: Chat): string => {
return mergeProfileFields(
chat.settings,
chat.settings.assistantMessageStart || valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder)
)
}
export const getSystemStart = (chat: Chat): string => {
return mergeProfileFields(
chat.settings,
chat.settings.systemMessageStart || valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder)
)
}
export const getRoleTag = (role: string, model: Model, chat: Chat): string => {
const modelDetails = getModelDetail(model)
switch (modelDetails.type) {
case 'Petals':
if (role === 'assistant') return getAssistantStart(chat) + ' '
if (role === 'user') return getUserStart(chat) + ' '
return getSystemStart(chat) + ' '
case 'OpenAIDall-e':
return role
case 'OpenAIChat':
default:
return role
}
}
export const getTokens = (model: Model, value: string): number[] => {
const modelDetails = getModelDetail(model)
switch (modelDetails.type) {
case 'Petals':
return llamaTokenizer.encode(value)
case 'OpenAIDall-e':
return [0]
case 'OpenAIChat':
default:
return encode(value)
}
}
export const countTokens = (model: Model, value: string): number => {
return getTokens(model, value).length
}
export const clearModelOptionCache = () => {
modelOptionCache.set([])
}
export async function getModelOptions (): Promise<SelectOption[]> {
const gSettings = get(globalStorage)
const openAiKey = get(apiKeyStorage)
const cachedOptions = get(modelOptionCache)
if (cachedOptions && cachedOptions.length) return cachedOptions
// Load available models from OpenAI
let openAiModels
let allowCache = true
if (openAiKey) {
try {
openAiModels = (await (
await fetch(getApiBase() + getEndpointModels(), {
method: 'GET',
headers: {
Authorization: `Bearer ${openAiKey}`,
'Content-Type': 'application/json'
}
})
).json()) as ResponseModels
} catch (e) {
allowCache = false
openAiModels = { data: [] }
}
} else {
openAiModels = { data: [] }
}
// const filteredModels = Object.keys(supportedModels).filter((model) => {
// switch (getModelDetail(model).type) {
// case 'Petals':
// return gSettings.enablePetals
// case 'OpenAIChat':
// default:
// return openAiModels.data && openAiModels.data.find((m) => m.id === model)
// }
// })
const modelOptions:SelectOption[] = Object.keys(supportedModels).reduce((a, m) => {
let disabled
const modelDetail = getModelDetail(m)
switch (modelDetail.type) {
case 'Petals':
disabled = !gSettings.enablePetals
break
case 'OpenAIChat':
default:
disabled = !(openAiModels.data && openAiModels.data.find((m) => m.id === m))
}
const o:SelectOption = {
value: m,
text: modelDetail.label || m,
disabled
}
a.push(o)
return a
}, [] as SelectOption[])
if (allowCache) modelOptionCache.set(modelOptions)
return modelOptions
}
</script> </script>

View File

@ -1,5 +1,5 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import { getChatDefaults, getExcludeFromProfile } from './Settings.svelte' import { getChatDefaults, getDefaultModel, getExcludeFromProfile } from './Settings.svelte'
import { get, writable } from 'svelte/store' import { get, writable } from 'svelte/store'
// Profile definitions // Profile definitions
import { addMessage, clearMessages, deleteMessage, getChat, getChatSettings, getCustomProfiles, getGlobalSettings, getMessages, newName, resetChatSettings, saveChatStore, setGlobalSettingValueByKey, setMessages, updateProfile } from './Storage.svelte' import { addMessage, clearMessages, deleteMessage, getChat, getChatSettings, getCustomProfiles, getGlobalSettings, getMessages, newName, resetChatSettings, saveChatStore, setGlobalSettingValueByKey, setMessages, updateProfile } from './Storage.svelte'
@ -22,7 +22,9 @@ export const getProfiles = (forceUpdate:boolean = false):Record<string, ChatSett
} }
const result = Object.entries(profiles const result = Object.entries(profiles
).reduce((a, [k, v]) => { ).reduce((a, [k, v]) => {
v = JSON.parse(JSON.stringify(v))
a[k] = v a[k] = v
v.model = v.model || getDefaultModel()
return a return a
}, {} as Record<string, ChatSettings>) }, {} as Record<string, ChatSettings>)
Object.entries(getCustomProfiles()).forEach(([k, v]) => { Object.entries(getCustomProfiles()).forEach(([k, v]) => {
@ -72,7 +74,7 @@ export const getProfile = (key:string, forReset:boolean = false):ChatSettings =>
export const mergeProfileFields = (settings: ChatSettings, content: string|undefined, maxWords: number|undefined = undefined): string => { export const mergeProfileFields = (settings: ChatSettings, content: string|undefined, maxWords: number|undefined = undefined): string => {
if (!content?.toString) return '' if (!content?.toString) return ''
content = (content + '').replaceAll('[[CHARACTER_NAME]]', settings.characterName || 'ChatGPT') content = (content + '').replaceAll('[[CHARACTER_NAME]]', settings.characterName || 'Assistant')
if (maxWords) content = (content + '').replaceAll('[[MAX_WORDS]]', maxWords.toString()) if (maxWords) content = (content + '').replaceAll('[[MAX_WORDS]]', maxWords.toString())
return content return content
} }

View File

@ -1,7 +1,7 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import { applyProfile } from './Profiles.svelte' import { applyProfile } from './Profiles.svelte'
import { getChatSettings, getGlobalSettings, setGlobalSettingValueByKey } from './Storage.svelte' import { get } from 'svelte/store'
import { encode } from 'gpt-tokenizer' import { apiKeyStorage, getChatSettings, getGlobalSettings, setGlobalSettingValueByKey } from './Storage.svelte'
import { faArrowDown91, faArrowDownAZ, faCheck, faThumbTack } from '@fortawesome/free-solid-svg-icons/index' import { faArrowDown91, faArrowDownAZ, faCheck, faThumbTack } from '@fortawesome/free-solid-svg-icons/index'
// Setting definitions // Setting definitions
@ -18,8 +18,15 @@ import {
type ChatSortOption type ChatSortOption
} from './Types.svelte' } from './Types.svelte'
import { getModelDetail, getTokens } from './Models.svelte'
export const defaultModel:Model = 'gpt-3.5-turbo' const defaultModel:Model = 'gpt-3.5-turbo'
const defaultModelPetals:Model = 'meta-llama/Llama-2-70b-chat-hf'
export const getDefaultModel = (): Model => {
if (!get(apiKeyStorage)) return defaultModelPetals
return defaultModel
}
export const getChatSettingList = (): ChatSetting[] => { export const getChatSettingList = (): ChatSetting[] => {
return chatSettingsList return chatSettingsList
@ -55,8 +62,16 @@ export const getExcludeFromProfile = () => {
return excludeFromProfile return excludeFromProfile
} }
const isNotOpenAI = (chatId) => {
return getModelDetail(getChatSettings(chatId).model).type !== 'OpenAIChat'
}
const isNotPetals = (chatId) => {
return getModelDetail(getChatSettings(chatId).model).type !== 'Petals'
}
const gptDefaults = { const gptDefaults = {
model: defaultModel, model: '',
messages: [], messages: [],
temperature: 1, temperature: 1,
top_p: 1, top_p: 1,
@ -94,6 +109,10 @@ const defaults:ChatSettings = {
hppContinuePrompt: '', hppContinuePrompt: '',
hppWithSummaryPrompt: false, hppWithSummaryPrompt: false,
imageGenerationSize: '', imageGenerationSize: '',
stopSequence: '',
userMessageStart: '',
assistantMessageStart: '',
systemMessageStart: '',
// useResponseAlteration: false, // useResponseAlteration: false,
// responseAlterations: [], // responseAlterations: [],
isDirty: false isDirty: false
@ -104,7 +123,10 @@ export const globalDefaults: GlobalSettings = {
lastProfile: 'default', lastProfile: 'default',
defaultProfile: 'default', defaultProfile: 'default',
hideSummarized: false, hideSummarized: false,
chatSort: 'created' chatSort: 'created',
openAICompletionEndpoint: '',
enablePetals: false,
pedalsEndpoint: ''
} }
const excludeFromProfile = { const excludeFromProfile = {
@ -399,7 +421,13 @@ const modelSetting: ChatSetting & SettingSelect = {
key: 'model', key: 'model',
name: 'Model', name: 'Model',
title: 'The model to use - GPT-3.5 is cheaper, but GPT-4 is more powerful.', title: 'The model to use - GPT-3.5 is cheaper, but GPT-4 is more powerful.',
header: 'Below are the settings that OpenAI allows to be changed for the API calls. See the <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">OpenAI API docs</a> for more details.', header: (chatId) => {
if (isNotOpenAI(chatId)) {
return 'Below are the settings that can be changed for the API calls. See <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">this overview</a> to start, though not all settings translate to Petals.'
} else {
return 'Below are the settings that OpenAI allows to be changed for the API calls. See the <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">OpenAI API docs</a> for more details.'
}
},
headerClass: 'is-warning', headerClass: 'is-warning',
options: [], options: [],
type: 'select', type: 'select',
@ -417,7 +445,8 @@ const chatSettingsList: ChatSetting[] = [
key: 'stream', key: 'stream',
name: 'Stream Response', name: 'Stream Response',
title: 'Stream responses as they are generated.', title: 'Stream responses as they are generated.',
type: 'boolean' type: 'boolean',
hide: isNotOpenAI
}, },
{ {
key: 'temperature', key: 'temperature',
@ -432,7 +461,7 @@ const chatSettingsList: ChatSetting[] = [
}, },
{ {
key: 'top_p', key: 'top_p',
name: 'Nucleus Sampling', name: 'Nucleus Sampling (Top-p)',
title: 'An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n' + title: 'An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n' +
'\n' + '\n' +
'We generally recommend altering this or temperature but not both', 'We generally recommend altering this or temperature but not both',
@ -448,7 +477,8 @@ const chatSettingsList: ChatSetting[] = [
min: 1, min: 1,
max: 10, max: 10,
step: 1, step: 1,
type: 'number' type: 'number',
hide: isNotOpenAI
}, },
{ {
key: 'max_tokens', key: 'max_tokens',
@ -460,6 +490,7 @@ const chatSettingsList: ChatSetting[] = [
max: 32768, max: 32768,
step: 1, step: 1,
type: 'number', type: 'number',
hide: isNotOpenAI,
forceApi: true // Since default here is different than gpt default, will make sure we always send it forceApi: true // Since default here is different than gpt default, will make sure we always send it
}, },
{ {
@ -469,7 +500,8 @@ const chatSettingsList: ChatSetting[] = [
min: -2, min: -2,
max: 2, max: 2,
step: 0.2, step: 0.2,
type: 'number' type: 'number',
hide: isNotOpenAI
}, },
{ {
key: 'frequency_penalty', key: 'frequency_penalty',
@ -478,7 +510,52 @@ const chatSettingsList: ChatSetting[] = [
min: -2, min: -2,
max: 2, max: 2,
step: 0.2, step: 0.2,
type: 'number' type: 'number',
hide: isNotOpenAI
},
{
key: 'stopSequence',
name: 'Stop Sequence',
title: 'Characters used to separate messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).stop
return (val && val[0]) || ''
},
hide: isNotPetals
},
{
key: 'userMessageStart',
name: 'User Message Start Sequence',
title: 'Sequence to denote user messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).userStart
return val || ''
},
hide: isNotPetals
},
{
key: 'assistantMessageStart',
name: 'Assistant Message Start Sequence',
title: 'Sequence to denote assistant messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).assistantStart
return val || ''
},
hide: isNotPetals
},
{
key: 'systemMessageStart',
name: 'System Message Start Sequence',
title: 'Sequence to denote system messages in the message chain.',
type: 'text',
placeholder: (chatId) => {
const val = getModelDetail(getChatSettings(chatId).model).systemStart
return val || ''
},
hide: isNotPetals
}, },
{ {
// logit bias editor not implemented yet // logit bias editor not implemented yet
@ -497,7 +574,7 @@ const chatSettingsList: ChatSetting[] = [
// console.log('logit_bias', val, getChatSettings(chatId).logit_bias) // console.log('logit_bias', val, getChatSettings(chatId).logit_bias)
if (!val) return null if (!val) return null
const tokenized:Record<number, number> = Object.entries(val).reduce((a, [k, v]) => { const tokenized:Record<number, number> = Object.entries(val).reduce((a, [k, v]) => {
const tokens:number[] = encode(k) const tokens:number[] = getTokens(getChatSettings(chatId).model, k)
tokens.forEach(t => { a[t] = v }) tokens.forEach(t => { a[t] = v })
return a return a
}, {} as Record<number, number>) }, {} as Record<number, number>)
@ -536,6 +613,21 @@ const globalSettingsList:GlobalSetting[] = [
key: 'hideSummarized', key: 'hideSummarized',
name: 'Hide Summarized Messages', name: 'Hide Summarized Messages',
type: 'boolean' type: 'boolean'
},
{
key: 'openAICompletionEndpoint',
name: 'OpenAI Completions Endpoint',
type: 'text'
},
{
key: 'enablePetals',
name: 'Enable Petals APIs',
type: 'boolean'
},
{
key: 'pedalsEndpoint',
name: 'Petals API Endpoint',
type: 'text'
} }
] ]

View File

@ -1,7 +1,7 @@
<script lang="ts"> <script lang="ts">
import { params } from 'svelte-spa-router' import { params } from 'svelte-spa-router'
import ChatMenuItem from './ChatMenuItem.svelte' import ChatMenuItem from './ChatMenuItem.svelte'
import { apiKeyStorage, chatsStorage, pinMainMenu, checkStateChange, getChatSortOption, setChatSortOption } from './Storage.svelte' import { apiKeyStorage, chatsStorage, pinMainMenu, checkStateChange, getChatSortOption, setChatSortOption, hasActiveModels } from './Storage.svelte'
import Fa from 'svelte-fa/src/fa.svelte' import Fa from 'svelte-fa/src/fa.svelte'
import { faSquarePlus, faKey } from '@fortawesome/free-solid-svg-icons/index' import { faSquarePlus, faKey } from '@fortawesome/free-solid-svg-icons/index'
import ChatOptionMenu from './ChatOptionMenu.svelte' import ChatOptionMenu from './ChatOptionMenu.svelte'
@ -14,10 +14,12 @@
$: activeChatId = $params && $params.chatId ? parseInt($params.chatId) : undefined $: activeChatId = $params && $params.chatId ? parseInt($params.chatId) : undefined
let sortOption = getChatSortOption() let sortOption = getChatSortOption()
let hasModels = hasActiveModels()
const onStateChange = (...args:any) => { const onStateChange = (...args:any) => {
sortOption = getChatSortOption() sortOption = getChatSortOption()
sortedChats = $chatsStorage.sort(sortOption.sortFn) sortedChats = $chatsStorage.sort(sortOption.sortFn)
hasModels = hasActiveModels()
} }
$: onStateChange($checkStateChange) $: onStateChange($checkStateChange)
@ -72,14 +74,14 @@
</div> </div>
</div> </div>
<div class="level-right"> <div class="level-right">
{#if !$apiKeyStorage} {#if !hasModels}
<div class="level-item"> <div class="level-item">
<a href={'#/'} class="panel-block" class:is-disabled={!$apiKeyStorage} <a href={'#/'} class="panel-block" class:is-disabled={!$apiKeyStorage}
><span class="greyscale mr-1"><Fa icon={faKey} /></span> API key</a ><span class="greyscale mr-1"><Fa icon={faKey} /></span> API key</a
></div> ></div>
{:else} {:else}
<div class="level-item"> <div class="level-item">
<button on:click={() => { $pinMainMenu = false; startNewChatWithWarning(activeChatId) }} class="panel-block button" title="Start new chat with default profile" class:is-disabled={!$apiKeyStorage} <button on:click={() => { $pinMainMenu = false; startNewChatWithWarning(activeChatId) }} class="panel-block button" title="Start new chat with default profile" class:is-disabled={!hasModels}
><span class="greyscale mr-1"><Fa icon={faSquarePlus} /></span> New chat</button> ><span class="greyscale mr-1"><Fa icon={faSquarePlus} /></span> New chat</button>
</div> </div>
{/if} {/if}

View File

@ -1,25 +1,43 @@
<script context="module" lang="ts"> <script context="module" lang="ts">
import { getModelDetail } from './Models.svelte' import { countTokens, getModelDetail, getRoleTag, getStopSequence } from './Models.svelte'
import type { Message, Model, Usage } from './Types.svelte' import type { Chat, Message, Model, Usage } from './Types.svelte'
import { encode } from 'gpt-tokenizer'
export const getPrice = (tokens: Usage, model: Model): number => { export const getPrice = (tokens: Usage, model: Model): number => {
const t = getModelDetail(model) const t = getModelDetail(model)
return ((tokens.prompt_tokens * t.prompt) + (tokens.completion_tokens * t.completion)) return ((tokens.prompt_tokens * t.prompt) + (tokens.completion_tokens * t.completion))
} }
export const countPromptTokens = (prompts:Message[], model:Model):number => { export const countPromptTokens = (prompts:Message[], model:Model, chat: Chat):number => {
return prompts.reduce((a, m) => { const detail = getModelDetail(model)
a += countMessageTokens(m, model) const count = prompts.reduce((a, m) => {
a += countMessageTokens(m, model, chat)
return a return a
}, 0) + 3 // Always seems to be message counts + 3 }, 0)
switch (detail.type) {
case 'Petals':
return count
case 'OpenAIChat':
default:
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different.
// Complete stab in the dark here -- update if you know where all the extra tokens really come from.
return count + 3 // Always seems to be message counts + 3
}
} }
export const countMessageTokens = (message:Message, model:Model):number => { export const countMessageTokens = (message:Message, model:Model, chat: Chat):number => {
// Not sure how OpenAI formats it, but this seems to get close to the right counts. const detail = getModelDetail(model)
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different. const stop = getStopSequence(chat)
// Complete stab in the dark here -- update if you know where all the extra tokens really come from. switch (detail.type) {
return encode('## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n').length case 'Petals':
return countTokens(model, getRoleTag(message.role, model, chat) + ': ' + message.content + (stop || '###'))
case 'OpenAIChat':
default:
// Not sure how OpenAI formats it, but this seems to get close to the right counts.
// Would be nice to know. This works for gpt-3.5. gpt-4 could be different.
// Complete stab in the dark here -- update if you know where all the extra tokens really come from.
return countTokens(model, '## ' + message.role + ' ##:\r\n\r\n' + message.content + '\r\n\r\n\r\n')
}
} }
export const getModelMaxTokens = (model:Model):number => { export const getModelMaxTokens = (model:Model):number => {

View File

@ -30,6 +30,11 @@
return get(apiKeyStorage) return get(apiKeyStorage)
} }
export const hasActiveModels = (): boolean => {
const globalSettings = get(globalStorage) || {}
return !!get(apiKeyStorage) || !!globalSettings.enablePetals
}
export const newChatID = (): number => { export const newChatID = (): number => {
const chats = get(chatsStorage) const chats = get(chatsStorage)
const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1 const chatId = chats.reduce((maxId, chat) => Math.max(maxId, chat.id), 0) + 1

View File

@ -7,7 +7,15 @@ export type Model = typeof supportedModelKeys[number];
export type ImageGenerationSizes = typeof imageGenerationSizeTypes[number]; export type ImageGenerationSizes = typeof imageGenerationSizeTypes[number];
export type RequestType = 'OpenAIChat' | 'OpenAIDall-e' | 'Petals'
export type ModelDetail = { export type ModelDetail = {
type: RequestType;
label?: string;
stop?: string[];
userStart?: string,
assistantStart?: string,
systemStart?: string,
prompt: number; prompt: number;
completion: number; completion: number;
max: number; max: number;
@ -105,6 +113,10 @@ export type ChatSettings = {
trainingPrompts?: Message[]; trainingPrompts?: Message[];
useResponseAlteration?: boolean; useResponseAlteration?: boolean;
responseAlterations?: ResponseAlteration[]; responseAlterations?: ResponseAlteration[];
stopSequence: string;
userMessageStart: string;
assistantMessageStart: string;
systemMessageStart: string;
isDirty?: boolean; isDirty?: boolean;
} & Request; } & Request;
@ -122,16 +134,16 @@ export type Chat = {
}; };
type ResponseOK = { type ResponseOK = {
id: string; id?: string;
object: string; object?: string;
created: number; created?: number;
choices: { choices?: {
index: number; index?: number;
message: Message; message: Message;
finish_reason: string; finish_reason?: string;
delta: Message; delta: Message;
}[]; }[];
usage: Usage; usage?: Usage;
model: Model; model: Model;
}; };
@ -172,6 +184,9 @@ export type GlobalSettings = {
defaultProfile: string; defaultProfile: string;
hideSummarized: boolean; hideSummarized: boolean;
chatSort: ChatSortOptions; chatSort: ChatSortOptions;
openAICompletionEndpoint: string;
enablePetals: boolean;
pedalsEndpoint: string;
}; };
type SettingNumber = { type SettingNumber = {
@ -184,6 +199,7 @@ export type GlobalSettings = {
export type SelectOption = { export type SelectOption = {
value: string|number; value: string|number;
text: string; text: string;
disabled?: boolean;
}; };
export type ChatSortOption = SelectOption & { export type ChatSortOption = SelectOption & {
@ -236,15 +252,17 @@ export type SubSetting = {
settings: any[]; settings: any[];
}; };
export type ValueFn = (chatId:number) => string
export type ChatSetting = { export type ChatSetting = {
key: keyof ChatSettings; key: keyof ChatSettings;
name: string; name: string;
title: string; title: string;
forceApi?: boolean; // force in api requests, even if set to default forceApi?: boolean; // force in api requests, even if set to default
hidden?: boolean; // Hide from setting menus hidden?: boolean; // Hide from setting menus
header?: string; header?: string | ValueFn;
headerClass?: string; headerClass?: string | ValueFn;
placeholder?: string; placeholder?: string | ValueFn;
hide?: (chatId:number) => boolean; hide?: (chatId:number) => boolean;
apiTransform?: (chatId:number, setting:ChatSetting, value:any) => any; apiTransform?: (chatId:number, setting:ChatSetting, value:any) => any;
fieldControls?: FieldControl[]; fieldControls?: FieldControl[];

View File

@ -147,4 +147,9 @@
newChat() newChat()
} }
export const valueOf = (chatId: number, value: any) => {
if (typeof value === 'function') return value(chatId)
return value
}
</script> </script>