2
0

Support Vision for compatible AI models

Closes #1313
This commit is contained in:
Baptiste Arnaud
2024-06-11 10:51:02 +02:00
parent 40ca02df8c
commit ee834999e6
23 changed files with 360 additions and 92 deletions

View File

@ -108,7 +108,7 @@ export const createChatMessage = createAction({
apiKey: apiKey,
})
const messages = parseChatMessages({ options, variables })
const messages = await parseChatMessages({ options, variables })
try {
const reply = await client.messages.create({
@ -153,7 +153,7 @@ export const createChatMessage = createAction({
apiKey: apiKey,
})
const messages = parseChatMessages({ options, variables })
const messages = await parseChatMessages({ options, variables })
const response = await client.messages.create({
messages,

View File

@ -12,3 +12,12 @@ export const defaultAnthropicOptions = {
temperature: 1,
maxTokens: 1024,
} as const
export const modelsWithImageUrlSupport = ['claude-3*']
export const supportedImageTypes = [
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
] as const

View File

@ -1,52 +1,148 @@
import { Anthropic } from '@anthropic-ai/sdk'
import { options as createMessageOptions } from '../actions/createChatMessage'
import { VariableStore } from '@typebot.io/forge'
import { isNotEmpty } from '@typebot.io/lib'
import { isDefined, isEmpty } from '@typebot.io/lib'
import { z } from '@typebot.io/forge/zod'
import ky, { HTTPError } from 'ky'
import {
defaultAnthropicOptions,
modelsWithImageUrlSupport,
supportedImageTypes,
} from '../constants'
import { wildcardMatch } from '@typebot.io/lib/wildcardMatch'
export const parseChatMessages = ({
options: { messages },
const isModelCompatibleWithImageUrls = (model: string | undefined) =>
model ? wildcardMatch(modelsWithImageUrlSupport)(model) : false
export const parseChatMessages = async ({
options: { messages, model },
variables,
}: {
options: Pick<z.infer<typeof createMessageOptions>, 'messages'>
options: Pick<z.infer<typeof createMessageOptions>, 'messages' | 'model'>
variables: VariableStore
}): Anthropic.Messages.MessageParam[] => {
const parsedMessages = messages
?.flatMap((message) => {
if (!message.role) return
}): Promise<Anthropic.Messages.MessageParam[]> => {
if (!messages) return []
const isVisionEnabled = isModelCompatibleWithImageUrls(
model ?? defaultAnthropicOptions.model
)
const parsedMessages = (
await Promise.all(
messages.map(async (message) => {
if (!message.role) return
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
if (message.role === 'Dialogue') {
if (!message.dialogueVariableId) return
const dialogue = variables.get(message.dialogueVariableId) ?? []
const dialogueArr = Array.isArray(dialogue) ? dialogue : [dialogue]
return dialogueArr.map((dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
return Promise.all(
dialogueArr.map(async (dialogueItem, index) => {
if (index === 0 && message.startsBy === 'assistant')
return {
role: 'assistant',
content: dialogueItem,
}
if (index % (message.startsBy === 'assistant' ? 1 : 2) === 0) {
return {
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(dialogueItem ?? '')
: dialogueItem,
}
}
return {
role: 'assistant',
content: dialogueItem,
}
})
)
}
if (!message.content) return
const content = variables.parse(message.content)
if (isEmpty(content)) return
if (message.role === 'user')
return {
role:
index % (message.startsBy === 'assistant' ? 1 : 2) === 0
? 'user'
: 'assistant',
content: dialogueItem,
role: 'user',
content: isVisionEnabled
? await splitUserTextMessageIntoBlocks(content)
: content,
}
})
}
if (!message.content) return
return {
role: message.role,
content: variables.parse(message.content),
} satisfies Anthropic.Messages.MessageParam
})
.filter(
(message) =>
isNotEmpty(message?.role) && isNotEmpty(message?.content?.toString())
) as Anthropic.Messages.MessageParam[]
return {
role: message.role,
content,
}
})
)
)
.flat()
.filter((message) => {
return isDefined(message?.role) && isDefined(message.content)
}) as Anthropic.Messages.MessageParam[]
return parsedMessages
}
const splitUserTextMessageIntoBlocks = async (
input: string
): Promise<
| string
| (Anthropic.Messages.TextBlockParam | Anthropic.Messages.ImageBlockParam)[]
> => {
const urlRegex = /(^|\n\n)(https?:\/\/[^\s]+)(\n\n|$)/g
const match = input.match(urlRegex)
if (!match) return input
const parts: (
| Anthropic.Messages.TextBlockParam
| Anthropic.Messages.ImageBlockParam
)[] = []
let processedInput = input
for (const url of match) {
const textBeforeUrl = processedInput.slice(0, processedInput.indexOf(url))
if (textBeforeUrl.trim().length > 0) {
parts.push({ type: 'text', text: textBeforeUrl })
}
const cleanUrl = url.trim()
try {
const response = await ky.get(cleanUrl)
if (
!response.ok ||
!supportedImageTypes.includes(
response.headers.get('content-type') as any
)
) {
parts.push({ type: 'text', text: cleanUrl })
} else {
parts.push({
type: 'image',
source: {
data: Buffer.from(await response.arrayBuffer()).toString('base64'),
type: 'base64',
media_type: response.headers.get('content-type') as any,
},
})
}
} catch (err) {
if (err instanceof HTTPError) {
console.log(err.response.status, await err.response.text())
} else {
console.error(err)
}
}
processedInput = processedInput.slice(
processedInput.indexOf(url) + url.length
)
}
if (processedInput.trim().length > 0) {
parts.push({ type: 'text', text: processedInput })
}
return parts
}

View File

@ -10,11 +10,13 @@
"@typebot.io/forge": "workspace:*",
"@typebot.io/lib": "workspace:*",
"@typebot.io/tsconfig": "workspace:*",
"@types/node": "^20.14.2",
"@types/react": "18.2.15",
"typescript": "5.4.5"
},
"dependencies": {
"@anthropic-ai/sdk": "0.20.6",
"ai": "3.1.12"
"ai": "3.1.12",
"ky": "1.2.3"
}
}