2
0

Support Vision for compatible AI models

Closes #1313
This commit is contained in:
Baptiste Arnaud
2024-06-11 10:51:02 +02:00
parent 40ca02df8c
commit ee834999e6
23 changed files with 360 additions and 92 deletions

View File

@ -157,10 +157,13 @@ const getExpressionToEvaluate =
return ${options.mapListItemParams?.targetListVariableId}.at(itemIndex)`
}
case 'Append value(s)': {
return `if(!${options.item}) return ${options.variableId};
if(!${options.variableId}) return [${options.item}];
if(!Array.isArray(${options.variableId})) return [${options.variableId}, ${options.item}];
return (${options.variableId}).concat(${options.item});`
const item = parseVariables(state.typebotsQueue[0].typebot.variables)(
options.item
)
return `if(\`${item}\` === '') return ${options.variableId};
if(!${options.variableId}) return [\`${item}\`];
if(!Array.isArray(${options.variableId})) return [${options.variableId}, \`${item}\`];
return (${options.variableId}).concat(\`${item}\`);`
}
case 'Empty': {
return null

View File

@ -1,6 +1,6 @@
{
"name": "@typebot.io/js",
"version": "0.2.85",
"version": "0.2.86",
"description": "Javascript library to display typebots on your website",
"type": "module",
"main": "dist/index.js",

View File

@ -6,7 +6,7 @@ import { createUniqueId } from 'solid-js'
let abortController: AbortController | null = null
const secondsToWaitBeforeRetries = 3
const maxRetryAttempts = 3
const maxRetryAttempts = 1
export const streamChat =
(context: ClientSideActionContext & { retryAttempt?: number }) =>

View File

@ -1,6 +1,6 @@
{
"name": "@typebot.io/nextjs",
"version": "0.2.85",
"version": "0.2.86",
"description": "Convenient library to display typebots on your Next.js website",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -1,6 +1,6 @@
{
"name": "@typebot.io/react",
"version": "0.2.85",
"version": "0.2.86",
"description": "Convenient library to display typebots on your React app",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -108,7 +108,7 @@ export const createChatMessage = createAction({
apiKey: apiKey,
})
const messages = parseChatMessages({ options, variables })
const messages = await parseChatMessages({ options, variables })
try {
const reply = await client.messages.create({
@ -153,7 +153,7 @@ export const createChatMessage = createAction({
apiKey: apiKey,
})
const messages = parseChatMessages({ options, variables })
const messages = await parseChatMessages({ options, variables })
const response = await client.messages.create({
messages,

View File

@ -12,3 +12,12 @@ export const defaultAnthropicOptions = {
temperature: 1,
maxTokens: 1024,
} as const
export const modelsWithImageUrlSupport = ['claude-3*']
export const supportedImageTypes = [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
] as const

View File

@ -1,52 +1,148 @@
import { Anthropic } from '@anthropic-ai/sdk'
import { options as createMessageOptions } from '../actions/createChatMessage'
import { VariableStore } from '@typebot.io/forge'
import { isNotEmpty } from '@typebot.io/lib'
import { isDefined, isEmpty } from '@typebot.io/lib'
import { z } from '@typebot.io/forge/zod'
import ky, { HTTPError } from 'ky'
import {
defaultAnthropicOptions,
modelsWithImageUrlSupport,
supportedImageTypes,
} from '../constants'
import { wildcardMatch } from '@typebot.io/lib/wildcardMatch'
export const parseChatMessages = ({
options: { messages },
const isModelCompatibleWithImageUrls = (model: string | undefined) =>
model ? wildcardMatch(modelsWithImageUrlSupport)(model) : false
export const parseChatMessages = async ({
options: { messages, model },
variables,
}: {
options: Pick<z.infer<typeof createMessageOptions>, 'messages'>
options: Pick<z.infer<typeof createMessageOptions>, 'messages' | 'model'>
variables: VariableStore
}): Anthropic.Messages.MessageParam[] => {
const parsedMessages = messages
?.flatMap((message) => {
if (!message.role) return
}): Promise<Anthropic.Messages.MessageParam[]> => {
if (!messages) return []
const isVisionEnabled = isModelCompatibleWithImageUrls(
model ?? defaultAnthropicOptions.model
)
const parsedMessages = (
await Promise.all(
messages.map(async (message) => {
if (!message.role) return
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
return dialogueArr.map((dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
return Promise.all(
dialogueArr.map(async (dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
if (index % (message.startsBy === 'assistant' ? 1 : 2) === 0) {
return {
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(dialogueItem ?? '')
: dialogueItem,
}
}
return {
role: 'assistant',
content: dialogueItem,
}
})
)
}
if (!message.content) return
const content = variables.parse(message.content)
if (isEmpty(content)) return
if (message.role === 'user')
return {
role:
index % (message.startsBy === 'assistant' ? 1 : 2) === 0
? 'user'
: 'assistant',
content: dialogueItem,
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(content)
: content,
}
})
}
if (!message.content) return
return {
role: message.role,
content: variables.parse(message.content),
} satisfies Anthropic.Messages.MessageParam
})
.filter(
(message) =>
isNotEmpty(message?.role) && isNotEmpty(message?.content?.toString())
) as Anthropic.Messages.MessageParam[]
return {
role: message.role,
content,
}
})
)
)
.flat()
.filter((message) => {
return isDefined(message?.role) && isDefined(message.content)
}) as Anthropic.Messages.MessageParam[]
return parsedMessages
}
const splitUserTextMessageIntoBlocks = async (
input: string
): Promise<
| string
| (Anthropic.Messages.TextBlockParam | Anthropic.Messages.ImageBlockParam)[]
> => {
const urlRegex = /(^|\n\n)(https?:\/\/[^\s]+)(\n\n|$)/g
const match = input.match(urlRegex)
if (!match) return input
const parts: (
| Anthropic.Messages.TextBlockParam
| Anthropic.Messages.ImageBlockParam
)[] = []
let processedInput = input
for (const url of match) {
const textBeforeUrl = processedInput.slice(0, processedInput.indexOf(url))
if (textBeforeUrl.trim().length > 0) {
parts.push({ type: 'text', text: textBeforeUrl })
}
const cleanUrl = url.trim()
try {
const response = await ky.get(cleanUrl)
if (
!response.ok ||
!supportedImageTypes.includes(
response.headers.get('content-type') as any
)
) {
parts.push({ type: 'text', text: cleanUrl })
} else {
parts.push({
type: 'image',
source: {
data: Buffer.from(await response.arrayBuffer()).toString('base64'),
type: 'base64',
media_type: response.headers.get('content-type') as any,
},
})
}
} catch (err) {
if (err instanceof HTTPError) {
console.log(err.response.status, await err.response.text())
} else {
console.error(err)
}
}
processedInput = processedInput.slice(
processedInput.indexOf(url) + url.length
)
}
if (processedInput.trim().length > 0) {
parts.push({ type: 'text', text: processedInput })
}
return parts
}

View File

@ -10,11 +10,13 @@
"@typebot.io/forge": "workspace:*",
"@typebot.io/lib": "workspace:*",
"@typebot.io/tsconfig": "workspace:*",
"@types/node": "^20.14.2",
"@types/react": "18.2.15",
"typescript": "5.4.5"
},
"dependencies": {
"@anthropic-ai/sdk": "0.20.6",
"ai": "3.1.12"
"ai": "3.1.12",
"ky": "1.2.3"
}
}

View File

@ -12,6 +12,8 @@ import { executeFunction } from '@typebot.io/variables/executeFunction'
import { readDataStream } from 'ai'
import { deprecatedAskAssistantOptions } from '../deprecated'
import { OpenAIAssistantStream } from '../helpers/OpenAIAssistantStream'
import { isModelCompatibleWithVision } from '../helpers/isModelCompatibleWithVision'
import { splitUserTextMessageIntoBlocks } from '../helpers/splitUserTextMessageIntoBlocks'
export const askAssistant = createAction({
auth,
@ -284,12 +286,16 @@ const createAssistantStream = async ({
return
}
const assistant = await openai.beta.assistants.retrieve(assistantId)
// Add a message to the thread
const createdMessage = await openai.beta.threads.messages.create(
currentThreadId,
{
role: 'user',
content: message,
content: isModelCompatibleWithVision(assistant.model)
? await splitUserTextMessageIntoBlocks(message)
: message,
}
)
return OpenAIAssistantStream(

View File

@ -7,6 +7,14 @@ export const openAIVoices = [
'shimmer',
] as const
export const modelsWithImageUrlSupport = [
'gpt-4-turbo*',
'gpt-4o*',
'gpt-4*vision-preview',
]
export const excludedModelsFromImageUrlSupport = ['gpt-4-turbo-preview']
export const defaultOpenAIOptions = {
baseUrl: 'https://api.openai.com/v1',
model: 'gpt-3.5-turbo',

View File

@ -0,0 +1,10 @@
import { wildcardMatch } from '@typebot.io/lib/wildcardMatch'
import {
excludedModelsFromImageUrlSupport,
modelsWithImageUrlSupport,
} from '../constants'
export const isModelCompatibleWithVision = (model: string | undefined) =>
model && !excludedModelsFromImageUrlSupport.includes(model)
? wildcardMatch(modelsWithImageUrlSupport)(model)
: false

View File

@ -1,51 +1,81 @@
import type { OpenAI } from 'openai'
import { VariableStore } from '@typebot.io/forge'
import { isNotEmpty } from '@typebot.io/lib'
import { isDefined, isEmpty } from '@typebot.io/lib'
import { ChatCompletionOptions } from '../shared/parseChatCompletionOptions'
import ky, { HTTPError } from 'ky'
import { defaultOpenAIOptions, modelsWithImageUrlSupport } from '../constants'
import { isModelCompatibleWithVision } from './isModelCompatibleWithVision'
import { splitUserTextMessageIntoBlocks } from './splitUserTextMessageIntoBlocks'
export const parseChatCompletionMessages = ({
options: { messages },
export const parseChatCompletionMessages = async ({
options: { messages, model },
variables,
}: {
options: ChatCompletionOptions
variables: VariableStore
}): OpenAI.Chat.ChatCompletionMessageParam[] => {
const parsedMessages = messages
?.flatMap((message) => {
if (!message.role) return
}): Promise<OpenAI.Chat.ChatCompletionMessageParam[]> => {
if (!messages) return []
const isVisionEnabled = isModelCompatibleWithVision(
model ?? defaultOpenAIOptions.model
)
const parsedMessages = (
await Promise.all(
messages.map(async (message) => {
if (!message.role) return
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
return dialogueArr.map((dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
return Promise.all(
dialogueArr.map(async (dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
if (index % (message.startsBy === 'assistant' ? 1 : 2) === 0) {
return {
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(dialogueItem ?? '')
: dialogueItem,
}
}
return {
role: 'assistant',
content: dialogueItem,
}
})
)
}
if (!message.content) return
const content = variables.parse(message.content)
if (isEmpty(content)) return
if (message.role === 'user')
return {
role:
index % (message.startsBy === 'assistant' ? 1 : 2) === 0
? 'user'
: 'assistant',
content: dialogueItem,
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(content)
: content,
}
})
}
if (!message.content) return
return {
role: message.role,
content: variables.parse(message.content),
} satisfies OpenAI.Chat.ChatCompletionMessageParam
})
.filter(
(message) =>
isNotEmpty(message?.role) && isNotEmpty(message?.content?.toString())
) as OpenAI.Chat.ChatCompletionMessageParam[]
return {
role: message.role,
content,
}
})
)
)
.flat()
.filter((message) => {
return isDefined(message?.role) && isDefined(message.content)
}) as OpenAI.Chat.ChatCompletionMessageParam[]
return parsedMessages
}

View File

@ -0,0 +1,53 @@
import ky, { HTTPError } from 'ky'
import OpenAI from 'openai'
export const splitUserTextMessageIntoBlocks = async (
input: string
): Promise<string | OpenAI.Chat.ChatCompletionContentPart[]> => {
const urlRegex = /(^|\n\n)(https?:\/\/[^\s]+)(\n\n|$)/g
const match = input.match(urlRegex)
if (!match) return input
let parts: OpenAI.Chat.ChatCompletionContentPart[] = []
let processedInput = input
for (const url of match) {
const textBeforeUrl = processedInput.slice(0, processedInput.indexOf(url))
if (textBeforeUrl.trim().length > 0) {
parts.push({ type: 'text', text: textBeforeUrl })
}
const cleanUrl = url.trim()
try {
const response = await ky.get(cleanUrl)
if (
!response.ok ||
!response.headers.get('content-type')?.startsWith('image/')
) {
parts.push({ type: 'text', text: cleanUrl })
} else {
parts.push({
type: 'image_url',
image_url: {
url: url.trim(),
detail: 'auto',
},
})
}
} catch (err) {
if (err instanceof HTTPError) {
console.log(err.response.status, await err.response.text())
} else {
console.error(err)
}
}
processedInput = processedInput.slice(
processedInput.indexOf(url) + url.length
)
}
if (processedInput.trim().length > 0) {
parts.push({ type: 'text', text: processedInput })
}
return parts
}

View File

@ -16,6 +16,7 @@
"@types/react": "18.2.15",
"typescript": "5.4.5",
"@typebot.io/lib": "workspace:*",
"@typebot.io/variables": "workspace:*"
"@typebot.io/variables": "workspace:*",
"ky": "1.2.3"
}
}

View File

@ -59,7 +59,7 @@ export const runChatCompletion = async ({
},
})) satisfies ChatCompletionTool[] | undefined
const messages = parseChatCompletionMessages({ options, variables })
const messages = await parseChatCompletionMessages({ options, variables })
const body = {
model,

View File

@ -47,7 +47,7 @@ export const runChatCompletionStream = async ({
},
})) satisfies ChatCompletionTool[] | undefined
const messages = parseChatCompletionMessages({ options, variables })
const messages = await parseChatCompletionMessages({ options, variables })
const response = await openai.chat.completions.create({
model,

View File

@ -45,6 +45,7 @@
"stripe": "12.13.0",
"unified": "11.0.4",
"validator": "13.11.0",
"wildcard-match": "5.1.3",
"zod": "3.22.4"
}
}

View File

@ -0,0 +1,3 @@
import wcmatch from 'wildcard-match'
export { wcmatch as wildcardMatch }