diff --git a/src/lib/ApiUtil.svelte b/src/lib/ApiUtil.svelte index 01b41ff..77edbc7 100644 --- a/src/lib/ApiUtil.svelte +++ b/src/lib/ApiUtil.svelte @@ -4,9 +4,11 @@ const endpointCompletions = import.meta.env.VITE_ENDPOINT_COMPLETIONS || '/v1/chat/completions' const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations' const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models' + const endpointEmbeddings = import.meta.env.VITE_ENDPOINT_EMBEDDINGS || '/v1/embeddings' export const getApiBase = ():string => apiBase export const getEndpointCompletions = ():string => endpointCompletions export const getEndpointGenerations = ():string => endpointGenerations export const getEndpointModels = ():string => endpointModels + export const getEndpointEmbeddings = ():string => endpointEmbeddings \ No newline at end of file diff --git a/src/lib/ChatRequest.svelte b/src/lib/ChatRequest.svelte index 2ab3008..c7c3e38 100644 --- a/src/lib/ChatRequest.svelte +++ b/src/lib/ChatRequest.svelte @@ -146,7 +146,8 @@ export class ChatRequest { const maxTokens = getModelMaxTokens(model) // Inject hidden prompts if requested - if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessages(filtered, true) + // if (!opts.summaryRequest) + this.buildHiddenPromptPrefixMessages(filtered, true) const messagePayload = filtered .filter(m => { if (m.skipOnce) { delete m.skipOnce; return false } return true }) .map(m => { @@ -242,7 +243,7 @@ export class ChatRequest { } else { const data = JSON.parse(ev.data) // console.log('data', data) - window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) }) + window.setTimeout(() => { chatResponse.updateFromAsyncResponse(data) }, 1) } } }, @@ -303,10 +304,16 @@ export class ChatRequest { const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim() const lastMessage = messages[messages.length - 1] const isContinue = lastMessage?.role === 'assistant' && lastMessage.finish_reason === 'length' - if (hiddenPromptPrefix && (lastMessage?.role === 'user' || isContinue)) { + const isUserPrompt = lastMessage?.role === 'user' + if (hiddenPromptPrefix && (isUserPrompt || isContinue)) { + let injectedPrompt = false const results = hiddenPromptPrefix.split(/[\s\r\n]*::EOM::[\s\r\n]*/).reduce((a, m) => { m = m.trim() if (m.length) { + if (m.match(/[[USER_PROMPT]]/)) { + injectedPrompt = true + m.replace(/[[USER_PROMPT]]/g, lastMessage.content) + } a.push({ role: a.length % 2 === 0 ? 'user' : 'assistant', content: m } as Message) } return a @@ -324,6 +331,7 @@ export class ChatRequest { lastMessage.skipOnce = true } } + if (injectedPrompt) results.pop() return results } return [] @@ -407,7 +415,7 @@ export class ChatRequest { let continueCounter = chatSettings.summaryExtend + 1 rw = rw.slice(0, 0 - pinBottom) let reductionPoolSize = countPromptTokens(rw, model) - const ss = chatSettings.summarySize + const ss = Math.abs(chatSettings.summarySize) const getSS = ():number => (ss < 1 && ss > 0) ? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced : Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count @@ -453,13 +461,24 @@ export class ChatRequest { const summaryIds = [summaryResponse.uuid] let loopCount = 0 let networkRetry = 2 // number of retries on network error + const summaryRequestMessage = summaryRequest.content + const mergedRequest = summaryRequestMessage.includes('[[MERGED_PROMPTS]]') while (continueCounter-- > 0) { let error = false + if (mergedRequest) { + const mergedPrompts = rw.map(m => { + return '[' + (m.role === 'assistant' ? '[[CHARACTER_NAME]]' : '[[USER_NAME]]') + ']\n' + + m.content + }).join('\n\n') + .replaceAll('[[CHARACTER_NAME]]', chatSettings.characterName) + .replaceAll('[[USER_NAME]]', 'Me') + summaryRequest.content = summaryRequestMessage.replaceAll('[[MERGED_PROMPTS]]', mergedPrompts) + } try { - const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]).concat(loopCount > 0 ? [summaryResponse] : []), { + const summary = await _this.sendRequest(top.concat(mergedRequest ? [] : rw).concat([summaryRequest]).concat(loopCount > 0 ? [summaryResponse] : []), { summaryRequest: true, streaming: opts.streaming, - maxTokens: maxSummaryTokens, + maxTokens: chatSettings.summarySize < 0 ? 4096 : maxSummaryTokens, fillMessage: summaryResponse, autoAddMessages: true, onMessageChange: (m) => { @@ -468,8 +487,8 @@ export class ChatRequest { } as ChatCompletionOpts, { temperature: chatSettings.summaryTemperature, // make summary more deterministic top_p: 1, - presence_penalty: 0, - frequency_penalty: 0, + // presence_penalty: 0, + // frequency_penalty: 0, ...overrides } as ChatSettings) // Wait for the response to complete diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte index 519df55..9699517 100644 --- a/src/lib/Settings.svelte +++ b/src/lib/Settings.svelte @@ -91,6 +91,7 @@ const defaults:ChatSettings = { trainingPrompts: [], hiddenPromptPrefix: '', hppContinuePrompt: '', + hppWithSummaryPrompt: false, imageGenerationSize: '', // useResponseAlteration: false, // responseAlterations: [], @@ -214,6 +215,14 @@ const systemPromptSettings: ChatSetting[] = [ type: 'textarea', hide: (chatId) => !getChatSettings(chatId).useSystemPrompt || !(getChatSettings(chatId).hiddenPromptPrefix || '').trim() }, + { + key: 'hppWithSummaryPrompt', + name: 'Use Hidden Prompt Prefix before Summary Prompt', + title: 'If using Hidden Prompts Prefix, should it also be included before the summary request', + placeholder: 'Enter something like [Continue your response below:]', + type: 'boolean', + hide: (chatId) => !getChatSettings(chatId).useSystemPrompt || !(getChatSettings(chatId).hiddenPromptPrefix || '').trim() + }, { key: 'trainingPrompts', name: 'Training Prompts', diff --git a/src/lib/Types.svelte b/src/lib/Types.svelte index 41721b5..7bb7835 100644 --- a/src/lib/Types.svelte +++ b/src/lib/Types.svelte @@ -77,7 +77,7 @@ export type Request = { max_tokens?: number; presence_penalty?: number; frequency_penalty?: number; - logit_bias?: Record | null; + logit_bias?: Record | null; user?: string; }; @@ -99,6 +99,7 @@ export type ChatSettings = { autoStartSession: boolean; hiddenPromptPrefix: string; hppContinuePrompt: string; // hiddenPromptPrefix used, optional glue when trying to continue truncated completion + hppWithSummaryPrompt: boolean; // include hiddenPromptPrefix when before summary prompt imageGenerationSize: ImageGenerationSizes; trainingPrompts?: Message[]; useResponseAlteration?: boolean;