Support Vision for compatible AI models

Closes #1313
This commit is contained in:
Baptiste Arnaud
2024-06-11 10:51:02 +02:00
parent 40ca02df8c
commit ee834999e6
23 changed files with 360 additions and 92 deletions

View File

@@ -12,6 +12,8 @@ import { executeFunction } from '@typebot.io/variables/executeFunction'
import { readDataStream } from 'ai'
import { deprecatedAskAssistantOptions } from '../deprecated'
import { OpenAIAssistantStream } from '../helpers/OpenAIAssistantStream'
import { isModelCompatibleWithVision } from '../helpers/isModelCompatibleWithVision'
import { splitUserTextMessageIntoBlocks } from '../helpers/splitUserTextMessageIntoBlocks'
export const askAssistant = createAction({
auth,
@@ -284,12 +286,16 @@ const createAssistantStream = async ({
return
}
const assistant = await openai.beta.assistants.retrieve(assistantId)
// Add a message to the thread
const createdMessage = await openai.beta.threads.messages.create(
currentThreadId,
{
role: 'user',
content: message,
content: isModelCompatibleWithVision(assistant.model)
? await splitUserTextMessageIntoBlocks(message)
: message,
}
)
return OpenAIAssistantStream(

View File

@@ -7,6 +7,14 @@ export const openAIVoices = [
'shimmer',
] as const
export const modelsWithImageUrlSupport = [
'gpt-4-turbo*',
'gpt-4o*',
'gpt-4*vision-preview',
]
export const excludedModelsFromImageUrlSupport = ['gpt-4-turbo-preview']
export const defaultOpenAIOptions = {
baseUrl: 'https://api.openai.com/v1',
model: 'gpt-3.5-turbo',

View File

@@ -0,0 +1,10 @@
import { wildcardMatch } from '@typebot.io/lib/wildcardMatch'
import {
excludedModelsFromImageUrlSupport,
modelsWithImageUrlSupport,
} from '../constants'
export const isModelCompatibleWithVision = (model: string | undefined) =>
model && !excludedModelsFromImageUrlSupport.includes(model)
? wildcardMatch(modelsWithImageUrlSupport)(model)
: false

View File

@@ -1,51 +1,81 @@
import type { OpenAI } from 'openai'
import { VariableStore } from '@typebot.io/forge'
import { isNotEmpty } from '@typebot.io/lib'
import { isDefined, isEmpty } from '@typebot.io/lib'
import { ChatCompletionOptions } from '../shared/parseChatCompletionOptions'
import ky, { HTTPError } from 'ky'
import { defaultOpenAIOptions, modelsWithImageUrlSupport } from '../constants'
import { isModelCompatibleWithVision } from './isModelCompatibleWithVision'
import { splitUserTextMessageIntoBlocks } from './splitUserTextMessageIntoBlocks'
export const parseChatCompletionMessages = ({
options: { messages },
export const parseChatCompletionMessages = async ({
options: { messages, model },
variables,
}: {
options: ChatCompletionOptions
variables: VariableStore
}): OpenAI.Chat.ChatCompletionMessageParam[] => {
const parsedMessages = messages
?.flatMap((message) => {
if (!message.role) return
}): Promise<OpenAI.Chat.ChatCompletionMessageParam[]> => {
if (!messages) return []
const isVisionEnabled = isModelCompatibleWithVision(
model ?? defaultOpenAIOptions.model
)
const parsedMessages = (
await Promise.all(
messages.map(async (message) => {
if (!message.role) return
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
return dialogueArr.map((dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
return Promise.all(
dialogueArr.map(async (dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
if (index % (message.startsBy === 'assistant' ? 1 : 2) === 0) {
return {
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(dialogueItem ?? '')
: dialogueItem,
}
}
return {
role: 'assistant',
content: dialogueItem,
}
})
)
}
if (!message.content) return
const content = variables.parse(message.content)
if (isEmpty(content)) return
if (message.role === 'user')
return {
role:
index % (message.startsBy === 'assistant' ? 1 : 2) === 0
? 'user'
: 'assistant',
content: dialogueItem,
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(content)
: content,
}
})
}
if (!message.content) return
return {
role: message.role,
content: variables.parse(message.content),
} satisfies OpenAI.Chat.ChatCompletionMessageParam
})
.filter(
(message) =>
isNotEmpty(message?.role) && isNotEmpty(message?.content?.toString())
) as OpenAI.Chat.ChatCompletionMessageParam[]
return {
role: message.role,
content,
}
})
)
)
.flat()
.filter((message) => {
return isDefined(message?.role) && isDefined(message.content)
}) as OpenAI.Chat.ChatCompletionMessageParam[]
return parsedMessages
}

View File

@@ -0,0 +1,53 @@
import ky, { HTTPError } from 'ky'
import OpenAI from 'openai'
export const splitUserTextMessageIntoBlocks = async (
input: string
): Promise<string | OpenAI.Chat.ChatCompletionContentPart[]> => {
const urlRegex = /(^|\n\n)(https?:\/\/[^\s]+)(\n\n|$)/g
const match = input.match(urlRegex)
if (!match) return input
let parts: OpenAI.Chat.ChatCompletionContentPart[] = []
let processedInput = input
for (const url of match) {
const textBeforeUrl = processedInput.slice(0, processedInput.indexOf(url))
if (textBeforeUrl.trim().length > 0) {
parts.push({ type: 'text', text: textBeforeUrl })
}
const cleanUrl = url.trim()
try {
const response = await ky.get(cleanUrl)
if (
!response.ok ||
!response.headers.get('content-type')?.startsWith('image/')
) {
parts.push({ type: 'text', text: cleanUrl })
} else {
parts.push({
type: 'image_url',
image_url: {
url: url.trim(),
detail: 'auto',
},
})
}
} catch (err) {
if (err instanceof HTTPError) {
console.log(err.response.status, await err.response.text())
} else {
console.error(err)
}
}
processedInput = processedInput.slice(
processedInput.indexOf(url) + url.length
)
}
if (processedInput.trim().length > 0) {
parts.push({ type: 'text', text: processedInput })
}
return parts
}

View File

@@ -16,6 +16,7 @@
"@types/react": "18.2.15",
"typescript": "5.4.5",
"@typebot.io/lib": "workspace:*",
"@typebot.io/variables": "workspace:*"
"@typebot.io/variables": "workspace:*",
"ky": "1.2.3"
}
}

View File

@@ -59,7 +59,7 @@ export const runChatCompletion = async ({
},
})) satisfies ChatCompletionTool[] | undefined
const messages = parseChatCompletionMessages({ options, variables })
const messages = await parseChatCompletionMessages({ options, variables })
const body = {
model,

View File

@@ -47,7 +47,7 @@ export const runChatCompletionStream = async ({
},
})) satisfies ChatCompletionTool[] | undefined
const messages = parseChatCompletionMessages({ options, variables })
const messages = await parseChatCompletionMessages({ options, variables })
const response = await openai.chat.completions.create({
model,