Changes to summary request and hidden prompt prefix
This commit is contained in:
parent
c9f48bbbb4
commit
a180f501d5
|
@ -4,9 +4,11 @@
|
||||||
const endpointCompletions = import.meta.env.VITE_ENDPOINT_COMPLETIONS || '/v1/chat/completions'
|
const endpointCompletions = import.meta.env.VITE_ENDPOINT_COMPLETIONS || '/v1/chat/completions'
|
||||||
const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations'
|
const endpointGenerations = import.meta.env.VITE_ENDPOINT_GENERATIONS || '/v1/images/generations'
|
||||||
const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models'
|
const endpointModels = import.meta.env.VITE_ENDPOINT_MODELS || '/v1/models'
|
||||||
|
const endpointEmbeddings = import.meta.env.VITE_ENDPOINT_EMBEDDINGS || '/v1/embeddings'
|
||||||
|
|
||||||
export const getApiBase = ():string => apiBase
|
export const getApiBase = ():string => apiBase
|
||||||
export const getEndpointCompletions = ():string => endpointCompletions
|
export const getEndpointCompletions = ():string => endpointCompletions
|
||||||
export const getEndpointGenerations = ():string => endpointGenerations
|
export const getEndpointGenerations = ():string => endpointGenerations
|
||||||
export const getEndpointModels = ():string => endpointModels
|
export const getEndpointModels = ():string => endpointModels
|
||||||
|
export const getEndpointEmbeddings = ():string => endpointEmbeddings
|
||||||
</script>
|
</script>
|
|
@ -146,7 +146,8 @@ export class ChatRequest {
|
||||||
const maxTokens = getModelMaxTokens(model)
|
const maxTokens = getModelMaxTokens(model)
|
||||||
|
|
||||||
// Inject hidden prompts if requested
|
// Inject hidden prompts if requested
|
||||||
if (!opts.summaryRequest) this.buildHiddenPromptPrefixMessages(filtered, true)
|
// if (!opts.summaryRequest)
|
||||||
|
this.buildHiddenPromptPrefixMessages(filtered, true)
|
||||||
const messagePayload = filtered
|
const messagePayload = filtered
|
||||||
.filter(m => { if (m.skipOnce) { delete m.skipOnce; return false } return true })
|
.filter(m => { if (m.skipOnce) { delete m.skipOnce; return false } return true })
|
||||||
.map(m => {
|
.map(m => {
|
||||||
|
@ -242,7 +243,7 @@ export class ChatRequest {
|
||||||
} else {
|
} else {
|
||||||
const data = JSON.parse(ev.data)
|
const data = JSON.parse(ev.data)
|
||||||
// console.log('data', data)
|
// console.log('data', data)
|
||||||
window.requestAnimationFrame(() => { chatResponse.updateFromAsyncResponse(data) })
|
window.setTimeout(() => { chatResponse.updateFromAsyncResponse(data) }, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -303,10 +304,16 @@ export class ChatRequest {
|
||||||
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
|
const hiddenPromptPrefix = mergeProfileFields(chatSettings, chatSettings.hiddenPromptPrefix).trim()
|
||||||
const lastMessage = messages[messages.length - 1]
|
const lastMessage = messages[messages.length - 1]
|
||||||
const isContinue = lastMessage?.role === 'assistant' && lastMessage.finish_reason === 'length'
|
const isContinue = lastMessage?.role === 'assistant' && lastMessage.finish_reason === 'length'
|
||||||
if (hiddenPromptPrefix && (lastMessage?.role === 'user' || isContinue)) {
|
const isUserPrompt = lastMessage?.role === 'user'
|
||||||
|
if (hiddenPromptPrefix && (isUserPrompt || isContinue)) {
|
||||||
|
let injectedPrompt = false
|
||||||
const results = hiddenPromptPrefix.split(/[\s\r\n]*::EOM::[\s\r\n]*/).reduce((a, m) => {
|
const results = hiddenPromptPrefix.split(/[\s\r\n]*::EOM::[\s\r\n]*/).reduce((a, m) => {
|
||||||
m = m.trim()
|
m = m.trim()
|
||||||
if (m.length) {
|
if (m.length) {
|
||||||
|
if (m.match(/[[USER_PROMPT]]/)) {
|
||||||
|
injectedPrompt = true
|
||||||
|
m.replace(/[[USER_PROMPT]]/g, lastMessage.content)
|
||||||
|
}
|
||||||
a.push({ role: a.length % 2 === 0 ? 'user' : 'assistant', content: m } as Message)
|
a.push({ role: a.length % 2 === 0 ? 'user' : 'assistant', content: m } as Message)
|
||||||
}
|
}
|
||||||
return a
|
return a
|
||||||
|
@ -324,6 +331,7 @@ export class ChatRequest {
|
||||||
lastMessage.skipOnce = true
|
lastMessage.skipOnce = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (injectedPrompt) results.pop()
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
return []
|
return []
|
||||||
|
@ -407,7 +415,7 @@ export class ChatRequest {
|
||||||
let continueCounter = chatSettings.summaryExtend + 1
|
let continueCounter = chatSettings.summaryExtend + 1
|
||||||
rw = rw.slice(0, 0 - pinBottom)
|
rw = rw.slice(0, 0 - pinBottom)
|
||||||
let reductionPoolSize = countPromptTokens(rw, model)
|
let reductionPoolSize = countPromptTokens(rw, model)
|
||||||
const ss = chatSettings.summarySize
|
const ss = Math.abs(chatSettings.summarySize)
|
||||||
const getSS = ():number => (ss < 1 && ss > 0)
|
const getSS = ():number => (ss < 1 && ss > 0)
|
||||||
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
|
? Math.round(reductionPoolSize * ss) // If summarySize between 0 and 1, use percentage of reduced
|
||||||
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
|
: Math.min(ss, reductionPoolSize * 0.5) // If > 1, use token count
|
||||||
|
@ -453,13 +461,24 @@ export class ChatRequest {
|
||||||
const summaryIds = [summaryResponse.uuid]
|
const summaryIds = [summaryResponse.uuid]
|
||||||
let loopCount = 0
|
let loopCount = 0
|
||||||
let networkRetry = 2 // number of retries on network error
|
let networkRetry = 2 // number of retries on network error
|
||||||
|
const summaryRequestMessage = summaryRequest.content
|
||||||
|
const mergedRequest = summaryRequestMessage.includes('[[MERGED_PROMPTS]]')
|
||||||
while (continueCounter-- > 0) {
|
while (continueCounter-- > 0) {
|
||||||
let error = false
|
let error = false
|
||||||
|
if (mergedRequest) {
|
||||||
|
const mergedPrompts = rw.map(m => {
|
||||||
|
return '[' + (m.role === 'assistant' ? '[[CHARACTER_NAME]]' : '[[USER_NAME]]') + ']\n' +
|
||||||
|
m.content
|
||||||
|
}).join('\n\n')
|
||||||
|
.replaceAll('[[CHARACTER_NAME]]', chatSettings.characterName)
|
||||||
|
.replaceAll('[[USER_NAME]]', 'Me')
|
||||||
|
summaryRequest.content = summaryRequestMessage.replaceAll('[[MERGED_PROMPTS]]', mergedPrompts)
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const summary = await _this.sendRequest(top.concat(rw).concat([summaryRequest]).concat(loopCount > 0 ? [summaryResponse] : []), {
|
const summary = await _this.sendRequest(top.concat(mergedRequest ? [] : rw).concat([summaryRequest]).concat(loopCount > 0 ? [summaryResponse] : []), {
|
||||||
summaryRequest: true,
|
summaryRequest: true,
|
||||||
streaming: opts.streaming,
|
streaming: opts.streaming,
|
||||||
maxTokens: maxSummaryTokens,
|
maxTokens: chatSettings.summarySize < 0 ? 4096 : maxSummaryTokens,
|
||||||
fillMessage: summaryResponse,
|
fillMessage: summaryResponse,
|
||||||
autoAddMessages: true,
|
autoAddMessages: true,
|
||||||
onMessageChange: (m) => {
|
onMessageChange: (m) => {
|
||||||
|
@ -468,8 +487,8 @@ export class ChatRequest {
|
||||||
} as ChatCompletionOpts, {
|
} as ChatCompletionOpts, {
|
||||||
temperature: chatSettings.summaryTemperature, // make summary more deterministic
|
temperature: chatSettings.summaryTemperature, // make summary more deterministic
|
||||||
top_p: 1,
|
top_p: 1,
|
||||||
presence_penalty: 0,
|
// presence_penalty: 0,
|
||||||
frequency_penalty: 0,
|
// frequency_penalty: 0,
|
||||||
...overrides
|
...overrides
|
||||||
} as ChatSettings)
|
} as ChatSettings)
|
||||||
// Wait for the response to complete
|
// Wait for the response to complete
|
||||||
|
|
|
@ -91,6 +91,7 @@ const defaults:ChatSettings = {
|
||||||
trainingPrompts: [],
|
trainingPrompts: [],
|
||||||
hiddenPromptPrefix: '',
|
hiddenPromptPrefix: '',
|
||||||
hppContinuePrompt: '',
|
hppContinuePrompt: '',
|
||||||
|
hppWithSummaryPrompt: false,
|
||||||
imageGenerationSize: '',
|
imageGenerationSize: '',
|
||||||
// useResponseAlteration: false,
|
// useResponseAlteration: false,
|
||||||
// responseAlterations: [],
|
// responseAlterations: [],
|
||||||
|
@ -214,6 +215,14 @@ const systemPromptSettings: ChatSetting[] = [
|
||||||
type: 'textarea',
|
type: 'textarea',
|
||||||
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt || !(getChatSettings(chatId).hiddenPromptPrefix || '').trim()
|
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt || !(getChatSettings(chatId).hiddenPromptPrefix || '').trim()
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
key: 'hppWithSummaryPrompt',
|
||||||
|
name: 'Use Hidden Prompt Prefix before Summary Prompt',
|
||||||
|
title: 'If using Hidden Prompts Prefix, should it also be included before the summary request',
|
||||||
|
placeholder: 'Enter something like [Continue your response below:]',
|
||||||
|
type: 'boolean',
|
||||||
|
hide: (chatId) => !getChatSettings(chatId).useSystemPrompt || !(getChatSettings(chatId).hiddenPromptPrefix || '').trim()
|
||||||
|
},
|
||||||
{
|
{
|
||||||
key: 'trainingPrompts',
|
key: 'trainingPrompts',
|
||||||
name: 'Training Prompts',
|
name: 'Training Prompts',
|
||||||
|
|
|
@ -77,7 +77,7 @@ export type Request = {
|
||||||
max_tokens?: number;
|
max_tokens?: number;
|
||||||
presence_penalty?: number;
|
presence_penalty?: number;
|
||||||
frequency_penalty?: number;
|
frequency_penalty?: number;
|
||||||
logit_bias?: Record<string, any> | null;
|
logit_bias?: Record<string, number> | null;
|
||||||
user?: string;
|
user?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -99,6 +99,7 @@ export type ChatSettings = {
|
||||||
autoStartSession: boolean;
|
autoStartSession: boolean;
|
||||||
hiddenPromptPrefix: string;
|
hiddenPromptPrefix: string;
|
||||||
hppContinuePrompt: string; // hiddenPromptPrefix used, optional glue when trying to continue truncated completion
|
hppContinuePrompt: string; // hiddenPromptPrefix used, optional glue when trying to continue truncated completion
|
||||||
|
hppWithSummaryPrompt: boolean; // include hiddenPromptPrefix when before summary prompt
|
||||||
imageGenerationSize: ImageGenerationSizes;
|
imageGenerationSize: ImageGenerationSizes;
|
||||||
trainingPrompts?: Message[];
|
trainingPrompts?: Message[];
|
||||||
useResponseAlteration?: boolean;
|
useResponseAlteration?: boolean;
|
||||||
|
|
Loading…
Reference in New Issue