Merge pull request #282 from Webifi/main
Bug fixes, Llama template changes
This commit is contained in:
		
						commit
						02cc45e553
					
				| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
    import { cleanContent, mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
 | 
					    import { cleanContent, mergeProfileFields, prepareSummaryPrompt } from './Profiles.svelte'
 | 
				
			||||||
    import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
 | 
					    import { countMessageTokens, countPromptTokens, getModelMaxTokens } from './Stats.svelte'
 | 
				
			||||||
    import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
 | 
					    import type { Chat, ChatCompletionOpts, ChatSettings, Message, Model, Request } from './Types.svelte'
 | 
				
			||||||
    import { deleteMessage, getChatSettingValueNullDefault, insertMessages, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage } from './Storage.svelte'
 | 
					    import { deleteMessage, getChatSettingValueNullDefault, insertMessages, addError, currentChatMessages, getMessages, updateMessages, deleteSummaryMessage, getChat } from './Storage.svelte'
 | 
				
			||||||
    import { scrollToBottom, scrollToMessage } from './Util.svelte'
 | 
					    import { scrollToBottom, scrollToMessage } from './Util.svelte'
 | 
				
			||||||
    import { getDefaultModel, getRequestSettingList } from './Settings.svelte'
 | 
					    import { getDefaultModel, getRequestSettingList } from './Settings.svelte'
 | 
				
			||||||
    import { v4 as uuidv4 } from 'uuid'
 | 
					    import { v4 as uuidv4 } from 'uuid'
 | 
				
			||||||
| 
						 | 
					@ -62,7 +62,8 @@ export class ChatRequest {
 | 
				
			||||||
      async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
 | 
					      async sendRequest (messages: Message[], opts: ChatCompletionOpts, overrides: ChatSettings = {} as ChatSettings): Promise<ChatCompletionResponse> {
 | 
				
			||||||
        // TODO:  Continue to break this method down to smaller chunks
 | 
					        // TODO:  Continue to break this method down to smaller chunks
 | 
				
			||||||
        const _this = this
 | 
					        const _this = this
 | 
				
			||||||
        const chat = _this.chat
 | 
					        const chat = getChat(_this.chat.id)
 | 
				
			||||||
 | 
					        this.setChat(chat)
 | 
				
			||||||
        const chatSettings = _this.chat.settings
 | 
					        const chatSettings = _this.chat.settings
 | 
				
			||||||
        const chatId = chat.id
 | 
					        const chatId = chat.id
 | 
				
			||||||
        const imagePromptDetect = /^\s*(please|can\s+you|will\s+you)*\s*(give|generate|create|show|build|design)\s+(me)*\s*(an|a|set|a\s+set\s+of)*\s*([0-9]+|one|two|three|four)*\s+(image|photo|picture|pic)s*\s*(for\s+me)*\s*(of|[^a-z0-9]+|about|that\s+has|showing|with|having|depicting)\s+[^a-z0-9]*(.*)$/i
 | 
					        const imagePromptDetect = /^\s*(please|can\s+you|will\s+you)*\s*(give|generate|create|show|build|design)\s+(me)*\s*(an|a|set|a\s+set\s+of)*\s*([0-9]+|one|two|three|four)*\s+(image|photo|picture|pic)s*\s*(for\s+me)*\s*(of|[^a-z0-9]+|about|that\s+has|showing|with|having|depicting)\s+[^a-z0-9]*(.*)$/i
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -22,14 +22,14 @@ const chatModelBase = {
 | 
				
			||||||
    See <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">this overview</a> to start, though not all settings translate to Petals.
 | 
					    See <a target="_blank" href="https://platform.openai.com/docs/api-reference/chat/create">this overview</a> to start, though not all settings translate to Petals.
 | 
				
			||||||
    <i>Note that some models may mot be functional.  See <a target="_blank" href="https://health.petals.dev">https://health.petals.dev</a> for current status.</i>`,
 | 
					    <i>Note that some models may mot be functional.  See <a target="_blank" href="https://health.petals.dev">https://health.petals.dev</a> for current status.</i>`,
 | 
				
			||||||
  check: checkModel,
 | 
					  check: checkModel,
 | 
				
			||||||
  start: '<s>',
 | 
					  start: '###',
 | 
				
			||||||
  stop: ['###', '</s>'],
 | 
					  stop: ['###', '</s>'],
 | 
				
			||||||
  delimiter: '\n###\n\n',
 | 
					  delimiter: '\n###\n###',
 | 
				
			||||||
  userStart: 'User:\n',
 | 
					  userStart: ' User: ',
 | 
				
			||||||
  userEnd: '',
 | 
					  userEnd: '',
 | 
				
			||||||
  assistantStart: '[[CHARACTER_NAME]]:\n',
 | 
					  assistantStart: ' [[CHARACTER_NAME]]: ',
 | 
				
			||||||
  assistantEnd: '',
 | 
					  assistantEnd: '',
 | 
				
			||||||
  leadPrompt: '[[CHARACTER_NAME]]:\n',
 | 
					  leadPrompt: ' [[CHARACTER_NAME]]: ',
 | 
				
			||||||
  systemEnd: '',
 | 
					  systemEnd: '',
 | 
				
			||||||
  prompt: 0.000000, // $0.000 per 1000 tokens prompt
 | 
					  prompt: 0.000000, // $0.000 per 1000 tokens prompt
 | 
				
			||||||
  completion: 0.000000, // $0.000 per 1000 tokens completion
 | 
					  completion: 0.000000, // $0.000 per 1000 tokens completion
 | 
				
			||||||
| 
						 | 
					@ -52,35 +52,35 @@ const chatModelBase = {
 | 
				
			||||||
} as ModelDetail
 | 
					} as ModelDetail
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export const chatModels : Record<string, ModelDetail> = {
 | 
					export const chatModels : Record<string, ModelDetail> = {
 | 
				
			||||||
      // 'enoch/llama-65b-hf': {
 | 
					      'enoch/llama-65b-hf': {
 | 
				
			||||||
      //   ...chatModelBase,
 | 
					        ...chatModelBase,
 | 
				
			||||||
      //   label: 'Petals - Llama-65b'
 | 
					        label: 'Petals - Llama-65b',
 | 
				
			||||||
      // },
 | 
					        max: 2048
 | 
				
			||||||
      // 'codellama/CodeLlama-34b-Instruct-hf ': {
 | 
					      },
 | 
				
			||||||
      //   ...chatModelBase,
 | 
					 | 
				
			||||||
      //   label: 'Petals - CodeLlama-34b',
 | 
					 | 
				
			||||||
      //   max: 2048
 | 
					 | 
				
			||||||
      // },
 | 
					 | 
				
			||||||
      'timdettmers/guanaco-65b': {
 | 
					      'timdettmers/guanaco-65b': {
 | 
				
			||||||
        ...chatModelBase,
 | 
					        ...chatModelBase,
 | 
				
			||||||
        label: 'Petals - Guanaco-65b',
 | 
					        label: 'Petals - Guanaco-65b',
 | 
				
			||||||
        max: 2048
 | 
					        max: 2048
 | 
				
			||||||
      },
 | 
					      },
 | 
				
			||||||
      'meta-llama/Llama-2-70b-hf': {
 | 
					      // 'codellama/CodeLlama-34b-Instruct-hf ': {
 | 
				
			||||||
        ...chatModelBase,
 | 
					      //   ...chatModelBase,
 | 
				
			||||||
        label: 'Petals - Llama-2-70b'
 | 
					      //   label: 'Petals - CodeLlama-34b',
 | 
				
			||||||
      },
 | 
					      //   max: 2048
 | 
				
			||||||
 | 
					      // },
 | 
				
			||||||
 | 
					      // 'meta-llama/Llama-2-70b-hf': {
 | 
				
			||||||
 | 
					      //   ...chatModelBase,
 | 
				
			||||||
 | 
					      //   label: 'Petals - Llama-2-70b'
 | 
				
			||||||
 | 
					      // },
 | 
				
			||||||
      'meta-llama/Llama-2-70b-chat-hf': {
 | 
					      'meta-llama/Llama-2-70b-chat-hf': {
 | 
				
			||||||
        ...chatModelBase,
 | 
					        ...chatModelBase,
 | 
				
			||||||
        label: 'Petals - Llama-2-70b-chat',
 | 
					        label: 'Petals - Llama-2-70b-chat',
 | 
				
			||||||
        start: '<s>',
 | 
					        start: '<s>',
 | 
				
			||||||
        stop: ['</s>', '[INST]', '[/INST]', '<<SYS>>', '<</SYS>>'],
 | 
					        stop: ['</s>', '[INST]', '[/INST]', '<<SYS>>', '<</SYS>>'],
 | 
				
			||||||
        delimiter: ' </s><s>',
 | 
					        delimiter: '</s><s>',
 | 
				
			||||||
        userStart: '[INST][[SYSTEM_PROMPT]]',
 | 
					        userStart: '[INST] User: ',
 | 
				
			||||||
        userEnd: ' [/INST]',
 | 
					        userEnd: ' [/INST]',
 | 
				
			||||||
        assistantStart: '[[SYSTEM_PROMPT]][[USER_PROMPT]]',
 | 
					        systemStart: '[INST] <<SYS>>\n',
 | 
				
			||||||
        systemStart: '<<SYS>>\n',
 | 
					        systemEnd: '\n<</SYS>> [/INST]'
 | 
				
			||||||
        systemEnd: '\n<</SYS>>\n\n'
 | 
					 | 
				
			||||||
        // leadPrompt: ''
 | 
					        // leadPrompt: ''
 | 
				
			||||||
      },
 | 
					      },
 | 
				
			||||||
      'stabilityai/StableBeluga2': {
 | 
					      'stabilityai/StableBeluga2': {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,6 +42,8 @@ export const chatRequest = async (
 | 
				
			||||||
      const signal = chatRequest.controller.signal
 | 
					      const signal = chatRequest.controller.signal
 | 
				
			||||||
      const providerData = chatRequest.providerData.petals || {}
 | 
					      const providerData = chatRequest.providerData.petals || {}
 | 
				
			||||||
      chatRequest.providerData.petals = providerData
 | 
					      chatRequest.providerData.petals = providerData
 | 
				
			||||||
 | 
					      const modelChanged = model !== providerData.lastModel
 | 
				
			||||||
 | 
					      providerData.lastModel = model
 | 
				
			||||||
      let ws: WebSocket = providerData.ws
 | 
					      let ws: WebSocket = providerData.ws
 | 
				
			||||||
      const abortListener = (e:Event) => {
 | 
					      const abortListener = (e:Event) => {
 | 
				
			||||||
        chatRequest.updating = false
 | 
					        chatRequest.updating = false
 | 
				
			||||||
| 
						 | 
					@ -161,7 +163,14 @@ export const chatRequest = async (
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
 | 
					      let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      let inputPrompt = startSequence
 | 
					      let midDel = ''
 | 
				
			||||||
 | 
					      for (let i = 0, l = delimiter.length; i < l; i++) {
 | 
				
			||||||
 | 
					        const chk = delimiter.slice(0, i)
 | 
				
			||||||
 | 
					        if ((providerData.knownBuffer || '').slice(0 - (i + 1)) === chk) midDel = chk
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      midDel = midDel.length ? delimiter.slice(0, 0 - midDel.length) : delimiter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      let inputPrompt = midDel
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const getNewWs = ():Promise<WebSocket> => new Promise<WebSocket>((resolve, reject) => {
 | 
					      const getNewWs = ():Promise<WebSocket> => new Promise<WebSocket>((resolve, reject) => {
 | 
				
			||||||
        // console.warn('requesting new ws')
 | 
					        // console.warn('requesting new ws')
 | 
				
			||||||
| 
						 | 
					@ -183,7 +192,7 @@ export const chatRequest = async (
 | 
				
			||||||
            throw err
 | 
					            throw err
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
          // console.warn('got new ws')
 | 
					          // console.warn('got new ws')
 | 
				
			||||||
          inputPrompt = lastPrompt
 | 
					          inputPrompt = lastPrompt + delimiter
 | 
				
			||||||
          providerData.knownBuffer = ''
 | 
					          providerData.knownBuffer = ''
 | 
				
			||||||
          providerData.ws = nws
 | 
					          providerData.ws = nws
 | 
				
			||||||
          resolve(nws)
 | 
					          resolve(nws)
 | 
				
			||||||
| 
						 | 
					@ -221,7 +230,8 @@ export const chatRequest = async (
 | 
				
			||||||
        const kb = providerData.knownBuffer.replace(rgxp, '')
 | 
					        const kb = providerData.knownBuffer.replace(rgxp, '')
 | 
				
			||||||
        const lp = lastPrompt.replace(rgxp, '')
 | 
					        const lp = lastPrompt.replace(rgxp, '')
 | 
				
			||||||
        const lm = kb === lp
 | 
					        const lm = kb === lp
 | 
				
			||||||
        if (!lm || countTokens(model, providerData.knownBuffer + inputPrompt) >= maxTokens) {
 | 
					        if (!chatSettings.holdSocket || modelChanged || !lm ||
 | 
				
			||||||
 | 
					            countTokens(model, providerData.knownBuffer + inputPrompt) >= maxTokens) {
 | 
				
			||||||
          wsOpen && ws.close()
 | 
					          wsOpen && ws.close()
 | 
				
			||||||
          ws = await getNewWs()
 | 
					          ws = await getNewWs()
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
| 
						 | 
					@ -231,7 +241,7 @@ export const chatRequest = async (
 | 
				
			||||||
        ws = await getNewWs()
 | 
					        ws = await getNewWs()
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      inputPrompt += delimiter + nextPrompt
 | 
					      inputPrompt += nextPrompt
 | 
				
			||||||
      providerData.knownBuffer += inputPrompt
 | 
					      providerData.knownBuffer += inputPrompt
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
      // console.log(
 | 
					      // console.log(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue