2
0

Support Vision for compatible AI models

Closes #1313
This commit is contained in:
Baptiste Arnaud
2024-06-11 10:51:02 +02:00
parent 40ca02df8c
commit ee834999e6
23 changed files with 360 additions and 92 deletions

View File

@ -11,7 +11,7 @@ import {
sessionOnlySetVariableOptions,
valueTypes,
} from '@typebot.io/schemas/features/blocks/logic/setVariable/constants'
import { TextInput } from '@/components/inputs'
import { TextInput, Textarea } from '@/components/inputs'
import { isDefined } from '@typebot.io/lib'
import { useTypebot } from '@/features/editor/providers/TypebotProvider'
import { isInputBlock } from '@typebot.io/schemas/helpers'
@ -215,7 +215,7 @@ const SetVariableValue = ({
)
}
case 'Append value(s)': {
return <TextInput defaultValue={options.item} onChange={updateItem} />
return <Textarea defaultValue={options.item} onChange={updateItem} />
}
case 'Moment of the day': {
return (

View File

@ -39,3 +39,15 @@ Finally, save the response of the assistant to a variable in order to append it
alt="Claude AI assistant message variable"
/>
</Frame>
## Vision support
`Create Chat Message` and `Ask Assistant` blocks support vision. This means that Typebot automatically detects images URL in any user message provided to OpenAI and parse it. The URL needs to be isolated from the rest of the text message to be properly detected. Here is an example of a message with an image URL:
If the selected model is [not compatible with vision](https://docs.anthropic.com/en/docs/vision), the image URL will be parsed as a plain text message.
```
What's in this picture?
https://domain.com/image.png
```

View File

@ -84,6 +84,18 @@ I also demonstrate how formatting can be affected by the presence of text before
<LoomVideo id="35dc8af6b9244762acc4a5acf275fb43" />
## Vision support
`Create Chat Message` and `Ask Assistant` blocks support vision. This means that Typebot automatically detects images URL in any user message provided to OpenAI and parse it. The URL needs to be isolated from the rest of the text message to be properly detected. Here is an example of a message with an image URL:
If the selected model is [not compatible with vision](https://platform.openai.com/docs/models), the image URL will be parsed as a plain text message.
```
What's in this picture?
https://domain.com/image.png
```
## Troobleshooting
### Error message: "OpenAI block returned error"

View File

@ -157,10 +157,13 @@ const getExpressionToEvaluate =
return ${options.mapListItemParams?.targetListVariableId}.at(itemIndex)`
}
case 'Append value(s)': {
return `if(!${options.item}) return ${options.variableId};
if(!${options.variableId}) return [${options.item}];
if(!Array.isArray(${options.variableId})) return [${options.variableId}, ${options.item}];
return (${options.variableId}).concat(${options.item});`
const item = parseVariables(state.typebotsQueue[0].typebot.variables)(
options.item
)
return `if(\`${item}\` === '') return ${options.variableId};
if(!${options.variableId}) return [\`${item}\`];
if(!Array.isArray(${options.variableId})) return [${options.variableId}, \`${item}\`];
return (${options.variableId}).concat(\`${item}\`);`
}
case 'Empty': {
return null

View File

@ -1,6 +1,6 @@
{
"name": "@typebot.io/js",
"version": "0.2.85",
"version": "0.2.86",
"description": "Javascript library to display typebots on your website",
"type": "module",
"main": "dist/index.js",

View File

@ -6,7 +6,7 @@ import { createUniqueId } from 'solid-js'
let abortController: AbortController | null = null
const secondsToWaitBeforeRetries = 3
const maxRetryAttempts = 3
const maxRetryAttempts = 1
export const streamChat =
(context: ClientSideActionContext & { retryAttempt?: number }) =>

View File

@ -1,6 +1,6 @@
{
"name": "@typebot.io/nextjs",
"version": "0.2.85",
"version": "0.2.86",
"description": "Convenient library to display typebots on your Next.js website",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -1,6 +1,6 @@
{
"name": "@typebot.io/react",
"version": "0.2.85",
"version": "0.2.86",
"description": "Convenient library to display typebots on your React app",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -108,7 +108,7 @@ export const createChatMessage = createAction({
apiKey: apiKey,
})
const messages = parseChatMessages({ options, variables })
const messages = await parseChatMessages({ options, variables })
try {
const reply = await client.messages.create({
@ -153,7 +153,7 @@ export const createChatMessage = createAction({
apiKey: apiKey,
})
const messages = parseChatMessages({ options, variables })
const messages = await parseChatMessages({ options, variables })
const response = await client.messages.create({
messages,

View File

@ -12,3 +12,12 @@ export const defaultAnthropicOptions = {
temperature: 1,
maxTokens: 1024,
} as const
export const modelsWithImageUrlSupport = ['claude-3*']
export const supportedImageTypes = [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
] as const

View File

@ -1,18 +1,33 @@
import { Anthropic } from '@anthropic-ai/sdk'
import { options as createMessageOptions } from '../actions/createChatMessage'
import { VariableStore } from '@typebot.io/forge'
import { isNotEmpty } from '@typebot.io/lib'
import { isDefined, isEmpty } from '@typebot.io/lib'
import { z } from '@typebot.io/forge/zod'
import ky, { HTTPError } from 'ky'
import {
defaultAnthropicOptions,
modelsWithImageUrlSupport,
supportedImageTypes,
} from '../constants'
import { wildcardMatch } from '@typebot.io/lib/wildcardMatch'
export const parseChatMessages = ({
options: { messages },
const isModelCompatibleWithImageUrls = (model: string | undefined) =>
model ? wildcardMatch(modelsWithImageUrlSupport)(model) : false
export const parseChatMessages = async ({
options: { messages, model },
variables,
}: {
options: Pick<z.infer<typeof createMessageOptions>, 'messages'>
options: Pick<z.infer<typeof createMessageOptions>, 'messages' | 'model'>
variables: VariableStore
}): Anthropic.Messages.MessageParam[] => {
const parsedMessages = messages
?.flatMap((message) => {
}): Promise<Anthropic.Messages.MessageParam[]> => {
if (!messages) return []
const isVisionEnabled = isModelCompatibleWithImageUrls(
model ?? defaultAnthropicOptions.model
)
const parsedMessages = (
await Promise.all(
messages.map(async (message) => {
if (!message.role) return
if (message.role === 'Dialogue') {
@ -20,33 +35,114 @@ export const parseChatMessages = ({
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
return dialogueArr.map((dialogueItem, index) => {
return Promise.all(
dialogueArr.map(async (dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
if (index % (message.startsBy === 'assistant' ? 1 : 2) === 0) {
return {
role:
index % (message.startsBy === 'assistant' ? 1 : 2) === 0
? 'user'
: 'assistant',
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(dialogueItem ?? '')
: dialogueItem,
}
}
return {
role: 'assistant',
content: dialogueItem,
}
})
)
}
if (!message.content) return
const content = variables.parse(message.content)
if (isEmpty(content)) return
if (message.role === 'user')
return {
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(content)
: content,
}
return {
role: message.role,
content: variables.parse(message.content),
} satisfies Anthropic.Messages.MessageParam
content,
}
})
.filter(
(message) =>
isNotEmpty(message?.role) && isNotEmpty(message?.content?.toString())
) as Anthropic.Messages.MessageParam[]
)
)
.flat()
.filter((message) => {
return isDefined(message?.role) && isDefined(message.content)
}) as Anthropic.Messages.MessageParam[]
return parsedMessages
}
const splitUserTextMessageIntoBlocks = async (
input: string
): Promise<
| string
| (Anthropic.Messages.TextBlockParam | Anthropic.Messages.ImageBlockParam)[]
> => {
const urlRegex = /(^|\n\n)(https?:\/\/[^\s]+)(\n\n|$)/g
const match = input.match(urlRegex)
if (!match) return input
const parts: (
| Anthropic.Messages.TextBlockParam
| Anthropic.Messages.ImageBlockParam
)[] = []
let processedInput = input
for (const url of match) {
const textBeforeUrl = processedInput.slice(0, processedInput.indexOf(url))
if (textBeforeUrl.trim().length > 0) {
parts.push({ type: 'text', text: textBeforeUrl })
}
const cleanUrl = url.trim()
try {
const response = await ky.get(cleanUrl)
if (
!response.ok ||
!supportedImageTypes.includes(
response.headers.get('content-type') as any
)
) {
parts.push({ type: 'text', text: cleanUrl })
} else {
parts.push({
type: 'image',
source: {
data: Buffer.from(await response.arrayBuffer()).toString('base64'),
type: 'base64',
media_type: response.headers.get('content-type') as any,
},
})
}
} catch (err) {
if (err instanceof HTTPError) {
console.log(err.response.status, await err.response.text())
} else {
console.error(err)
}
}
processedInput = processedInput.slice(
processedInput.indexOf(url) + url.length
)
}
if (processedInput.trim().length > 0) {
parts.push({ type: 'text', text: processedInput })
}
return parts
}

View File

@ -10,11 +10,13 @@
"@typebot.io/forge": "workspace:*",
"@typebot.io/lib": "workspace:*",
"@typebot.io/tsconfig": "workspace:*",
"@types/node": "^20.14.2",
"@types/react": "18.2.15",
"typescript": "5.4.5"
},
"dependencies": {
"@anthropic-ai/sdk": "0.20.6",
"ai": "3.1.12"
"ai": "3.1.12",
"ky": "1.2.3"
}
}

View File

@ -12,6 +12,8 @@ import { executeFunction } from '@typebot.io/variables/executeFunction'
import { readDataStream } from 'ai'
import { deprecatedAskAssistantOptions } from '../deprecated'
import { OpenAIAssistantStream } from '../helpers/OpenAIAssistantStream'
import { isModelCompatibleWithVision } from '../helpers/isModelCompatibleWithVision'
import { splitUserTextMessageIntoBlocks } from '../helpers/splitUserTextMessageIntoBlocks'
export const askAssistant = createAction({
auth,
@ -284,12 +286,16 @@ const createAssistantStream = async ({
return
}
const assistant = await openai.beta.assistants.retrieve(assistantId)
// Add a message to the thread
const createdMessage = await openai.beta.threads.messages.create(
currentThreadId,
{
role: 'user',
content: message,
content: isModelCompatibleWithVision(assistant.model)
? await splitUserTextMessageIntoBlocks(message)
: message,
}
)
return OpenAIAssistantStream(

View File

@ -7,6 +7,14 @@ export const openAIVoices = [
'shimmer',
] as const
export const modelsWithImageUrlSupport = [
'gpt-4-turbo*',
'gpt-4o*',
'gpt-4*vision-preview',
]
export const excludedModelsFromImageUrlSupport = ['gpt-4-turbo-preview']
export const defaultOpenAIOptions = {
baseUrl: 'https://api.openai.com/v1',
model: 'gpt-3.5-turbo',

View File

@ -0,0 +1,10 @@
import { wildcardMatch } from '@typebot.io/lib/wildcardMatch'
import {
excludedModelsFromImageUrlSupport,
modelsWithImageUrlSupport,
} from '../constants'
export const isModelCompatibleWithVision = (model: string | undefined) =>
model && !excludedModelsFromImageUrlSupport.includes(model)
? wildcardMatch(modelsWithImageUrlSupport)(model)
: false

View File

@ -1,17 +1,26 @@
import type { OpenAI } from 'openai'
import { VariableStore } from '@typebot.io/forge'
import { isNotEmpty } from '@typebot.io/lib'
import { isDefined, isEmpty } from '@typebot.io/lib'
import { ChatCompletionOptions } from '../shared/parseChatCompletionOptions'
import ky, { HTTPError } from 'ky'
import { defaultOpenAIOptions, modelsWithImageUrlSupport } from '../constants'
import { isModelCompatibleWithVision } from './isModelCompatibleWithVision'
import { splitUserTextMessageIntoBlocks } from './splitUserTextMessageIntoBlocks'
export const parseChatCompletionMessages = ({
options: { messages },
export const parseChatCompletionMessages = async ({
options: { messages, model },
variables,
}: {
options: ChatCompletionOptions
variables: VariableStore
}): OpenAI.Chat.ChatCompletionMessageParam[] => {
const parsedMessages = messages
?.flatMap((message) => {
}): Promise<OpenAI.Chat.ChatCompletionMessageParam[]> => {
if (!messages) return []
const isVisionEnabled = isModelCompatibleWithVision(
model ?? defaultOpenAIOptions.model
)
const parsedMessages = (
await Promise.all(
messages.map(async (message) => {
if (!message.role) return
if (message.role === 'Dialogue') {
@ -19,33 +28,54 @@ export const parseChatCompletionMessages = ({
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
return dialogueArr.map((dialogueItem, index) => {
return Promise.all(
dialogueArr.map(async (dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
if (index % (message.startsBy === 'assistant' ? 1 : 2) === 0) {
return {
role:
index % (message.startsBy === 'assistant' ? 1 : 2) === 0
? 'user'
: 'assistant',
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(dialogueItem ?? '')
: dialogueItem,
}
}
return {
role: 'assistant',
content: dialogueItem,
}
})
)
}
if (!message.content) return
const content = variables.parse(message.content)
if (isEmpty(content)) return
if (message.role === 'user')
return {
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(content)
: content,
}
return {
role: message.role,
content: variables.parse(message.content),
} satisfies OpenAI.Chat.ChatCompletionMessageParam
content,
}
})
.filter(
(message) =>
isNotEmpty(message?.role) && isNotEmpty(message?.content?.toString())
) as OpenAI.Chat.ChatCompletionMessageParam[]
)
)
.flat()
.filter((message) => {
return isDefined(message?.role) && isDefined(message.content)
}) as OpenAI.Chat.ChatCompletionMessageParam[]
return parsedMessages
}

View File

@ -0,0 +1,53 @@
import ky, { HTTPError } from 'ky'
import OpenAI from 'openai'
export const splitUserTextMessageIntoBlocks = async (
input: string
): Promise<string | OpenAI.Chat.ChatCompletionContentPart[]> => {
const urlRegex = /(^|\n\n)(https?:\/\/[^\s]+)(\n\n|$)/g
const match = input.match(urlRegex)
if (!match) return input
let parts: OpenAI.Chat.ChatCompletionContentPart[] = []
let processedInput = input
for (const url of match) {
const textBeforeUrl = processedInput.slice(0, processedInput.indexOf(url))
if (textBeforeUrl.trim().length > 0) {
parts.push({ type: 'text', text: textBeforeUrl })
}
const cleanUrl = url.trim()
try {
const response = await ky.get(cleanUrl)
if (
!response.ok ||
!response.headers.get('content-type')?.startsWith('image/')
) {
parts.push({ type: 'text', text: cleanUrl })
} else {
parts.push({
type: 'image_url',
image_url: {
url: url.trim(),
detail: 'auto',
},
})
}
} catch (err) {
if (err instanceof HTTPError) {
console.log(err.response.status, await err.response.text())
} else {
console.error(err)
}
}
processedInput = processedInput.slice(
processedInput.indexOf(url) + url.length
)
}
if (processedInput.trim().length > 0) {
parts.push({ type: 'text', text: processedInput })
}
return parts
}

View File

@ -16,6 +16,7 @@
"@types/react": "18.2.15",
"typescript": "5.4.5",
"@typebot.io/lib": "workspace:*",
"@typebot.io/variables": "workspace:*"
"@typebot.io/variables": "workspace:*",
"ky": "1.2.3"
}
}

View File

@ -59,7 +59,7 @@ export const runChatCompletion = async ({
},
})) satisfies ChatCompletionTool[] | undefined
const messages = parseChatCompletionMessages({ options, variables })
const messages = await parseChatCompletionMessages({ options, variables })
const body = {
model,

View File

@ -47,7 +47,7 @@ export const runChatCompletionStream = async ({
},
})) satisfies ChatCompletionTool[] | undefined
const messages = parseChatCompletionMessages({ options, variables })
const messages = await parseChatCompletionMessages({ options, variables })
const response = await openai.chat.completions.create({
model,

View File

@ -45,6 +45,7 @@
"stripe": "12.13.0",
"unified": "11.0.4",
"validator": "13.11.0",
"wildcard-match": "5.1.3",
"zod": "3.22.4"
}
}

View File

@ -0,0 +1,3 @@
import wcmatch from 'wildcard-match'
export { wcmatch as wildcardMatch }

30
pnpm-lock.yaml generated
View File

@ -1290,6 +1290,9 @@ importers:
ai:
specifier: 3.1.12
version: 3.1.12(openai@4.47.1)(react@18.2.0)(solid-js@1.7.8)(svelte@4.2.12)(vue@3.4.21)(zod@3.22.4)
ky:
specifier: 1.2.3
version: 1.2.3
devDependencies:
'@typebot.io/forge':
specifier: workspace:*
@ -1300,6 +1303,9 @@ importers:
'@typebot.io/tsconfig':
specifier: workspace:*
version: link:../../../tsconfig
'@types/node':
specifier: ^20.14.2
version: 20.14.2
'@types/react':
specifier: 18.2.15
version: 18.2.15
@ -1494,6 +1500,9 @@ importers:
'@types/react':
specifier: 18.2.15
version: 18.2.15
ky:
specifier: 1.2.3
version: 1.2.3
typescript:
specifier: 5.4.5
version: 5.4.5
@ -1706,6 +1715,9 @@ importers:
validator:
specifier: 13.11.0
version: 13.11.0
wildcard-match:
specifier: ^5.1.3
version: 5.1.3
zod:
specifier: 3.22.4
version: 3.22.4
@ -9562,7 +9574,7 @@ packages:
/@types/cors@2.8.13:
resolution: {integrity: sha512-RG8AStHlUiV5ysZQKq97copd2UmVYw3/pRMLefISZ3S1hK104Cwm7iLQ3fTKx+lsUH2CE8FlLaYeEA2LSeqYUA==}
dependencies:
'@types/node': 20.12.4
'@types/node': 20.4.9
/@types/debug@4.1.12:
resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==}
@ -9699,7 +9711,7 @@ packages:
/@types/jsonwebtoken@9.0.2:
resolution: {integrity: sha512-drE6uz7QBKq1fYqqoFKTDRdFCPHd5TCub75BM+D+cMx7NU9hUz7SESLfC2fSCXVFMO5Yj8sOWHuGqPgjc+fz0Q==}
dependencies:
'@types/node': 20.12.4
'@types/node': 20.4.9
dev: true
/@types/katex@0.16.7:
@ -9760,7 +9772,7 @@ packages:
/@types/node-fetch@2.6.11:
resolution: {integrity: sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==}
dependencies:
'@types/node': 20.4.9
'@types/node': 20.14.2
form-data: 4.0.0
dev: false
@ -9772,6 +9784,12 @@ packages:
resolution: {integrity: sha512-E+Fa9z3wSQpzgYQdYmme5X3OTuejnnTx88A6p6vkkJosR3KBz+HpE3kqNm98VE6cfLFcISx7zW7MsJkH6KwbTw==}
dependencies:
undici-types: 5.26.5
dev: false
/@types/node@20.14.2:
resolution: {integrity: sha512-xyu6WAMVwv6AKFLB+e/7ySZVr/0zLCzOa7rSpq6jNwpqOrUbcACDWC+53d4n2QHOnDou0fbIsg8wZu/sxrnI4Q==}
dependencies:
undici-types: 5.26.5
/@types/node@20.4.2:
resolution: {integrity: sha512-Dd0BYtWgnWJKwO1jkmTrzofjK2QXXcai0dmtzvIBhcA+RsG5h8R3xlyta0kGOZRNfL9GuRtb1knmPEhQrePCEw==}
@ -9795,7 +9813,7 @@ packages:
/@types/papaparse@5.3.7:
resolution: {integrity: sha512-f2HKmlnPdCvS0WI33WtCs5GD7X1cxzzS/aduaxSu3I7TbhWlENjSPs6z5TaB9K0J+BH1jbmqTaM+ja5puis4wg==}
dependencies:
'@types/node': 20.12.4
'@types/node': 20.4.9
dev: true
/@types/parse-json@4.0.2:
@ -23173,6 +23191,10 @@ packages:
string-width: 5.1.2
dev: false
/wildcard-match@5.1.3:
resolution: {integrity: sha512-a95hPUk+BNzSGLntNXYxsjz2Hooi5oL7xOfJR6CKwSsSALh7vUNuTlzsrZowtYy38JNduYFRVhFv19ocqNOZlg==}
dev: false
/wrap-ansi@6.2.0:
resolution: {integrity: sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==}
engines: {node: '>=8'}