Compare commits

...

1 Commits

Author SHA1 Message Date
pit
a03e74d660 feat: chat with pdf 2023-10-25 09:29:34 +03:00
10 changed files with 2030 additions and 45 deletions

View File

@@ -0,0 +1,43 @@
import fs from 'fs/promises';
import { loadFileIntoPinecone } from '@documenso/lib/server-only/pinecone';
import { getFile } from '@documenso/lib/universal/upload/get-file';
import { DocumentDataType } from '@documenso/prisma/client';
import { Card, CardContent } from '@documenso/ui/primitives/card';
import { Chat } from './chat';
type ChatPDFProps = {
id: string;
type: DocumentDataType;
data: string;
initialData: string;
};
export async function ChatPDF({ documentData }: { documentData: ChatPDFProps }) {
const docData = await getFile(documentData);
const fileName = `${documentData.id}}.pdf`;
try {
await fs.access(fileName, fs.constants.F_OK);
} catch (err) {
await fs.writeFile(fileName, docData);
}
await loadFileIntoPinecone(fileName);
return (
<Card className="my-8" gradient={true} degrees={200}>
<CardContent className="mt-8 flex flex-col">
<h2 className="text-foreground text-2xl font-semibold">Chat with the PDF</h2>
<p className="text-muted-foreground mt-2 text-sm">Ask any questions regarding the PDF</p>
<hr className="border-border mb-4 mt-4" />
<Chat />
<hr className="border-border mb-4 mt-4" />
<p className="text-muted-foreground text-sm italic">
Disclaimer: Never trust AI 100%. Always double check the documents yourself. Documenso is
not liable for any issue arising from you relying 100% on the AI.
</p>
</CardContent>
</Card>
);
}

View File

@@ -0,0 +1,56 @@
'use client';
import { useChat } from 'ai/react';
import { cn } from '@documenso/ui/lib/utils';
import { Button } from '@documenso/ui/primitives/button';
import { Input } from '@documenso/ui/primitives/input';
type Props = {};
export function Chat({}: Props) {
const { input, handleInputChange, handleSubmit, messages } = useChat({
api: '/api/chat',
});
// continue https://youtu.be/bZFedu-0emE?si=2JGSJfSQ38aXSlp2&t=10941
return (
<div>
<div className="flex flex-col gap-8">
<ul>
{messages.map((message, index) => (
<li
className={cn(
'flex',
message.role === 'user'
? 'mb-6 ml-10 mt-6 flex justify-end'
: 'mr-10 justify-start',
)}
key={index}
>
<span
className={
message.role === 'user'
? 'bg-background text-foreground group relative rounded-lg border-2 p-4 backdrop-blur-[2px]'
: 'bg-primary text-primary-foreground rounded-lg p-4 backdrop-blur-[2px]'
}
>
{message.content}
</span>
</li>
))}
</ul>
</div>
<form className="mb-2 mt-8 flex" onSubmit={handleSubmit}>
<Input
value={input}
className="mr-6 w-1/2"
onChange={handleInputChange}
placeholder="Ask away..."
/>
<Button type="submit">Send</Button>
</form>
</div>
);
}

View File

@@ -87,7 +87,7 @@ export const SigningForm = ({ document, recipient, fields }: SigningFormProps) =
Please review the document before signing.
</p>
<hr className="border-border mb-8 mt-4" />
<hr className="border-border mb-8 mt-4 h-8 w-full" />
<div className="-mx-2 flex flex-1 flex-col gap-4 overflow-y-auto px-2">
<div className="flex flex-1 flex-col gap-y-4">

View File

@@ -14,6 +14,7 @@ import { Card, CardContent } from '@documenso/ui/primitives/card';
import { ElementVisible } from '@documenso/ui/primitives/element-visible';
import { LazyPDFViewer } from '@documenso/ui/primitives/lazy-pdf-viewer';
import { ChatPDF } from './chat-pdf';
import { DateField } from './date-field';
import { EmailField } from './email-field';
import { SigningForm } from './form';
@@ -106,6 +107,7 @@ export default async function SigningPage({ params: { token } }: SigningPageProp
.otherwise(() => null),
)}
</ElementVisible>
<ChatPDF documentData={documentData} />
</div>
</SigningProvider>
);

View File

@@ -0,0 +1,54 @@
import { Message, OpenAIStream, StreamingTextResponse } from 'ai';
import { Configuration, OpenAIApi } from 'openai-edge';
import { getContext } from '@documenso/lib/server-only/context';
export const runtime = 'edge';
const config = new Configuration({
apiKey: process.env.OPENAI_API_KEY!,
});
const openai = new OpenAIApi(config);
export default async function handler(request: Request) {
// console.log(request.method);
// request.json().then((data) => console.log(data));
// return Response.json({ message: 'world' });
try {
const data = await request.json();
const lastMessage = data.messages[data.messages.length - 1];
const context = await getContext(lastMessage.content);
console.log('context', context);
const prompt = {
role: 'system',
content: `AI assistant is a brand new, powerful, human-like artificial intelligence.
The traits of AI include expert knowledge, helpfulness, cleverness, and articulateness.
AI is a well-behaved and well-mannered individual.
AI is always friendly, kind, and inspiring, and he is eager to provide vivid and thoughtful responses to the user.
AI has the sum of all knowledge in their brain, and is able to accurately answer nearly any question about any topic in conversation.
AI assistant is a big fan of Pinecone and Vercel.
START CONTEXT BLOCK
${context}
END OF CONTEXT BLOCK
AI assistant will take into account any CONTEXT BLOCK that is provided in a conversation.
If the context does not provide the answer to question, the AI assistant will say, "I'm sorry, but I don't know the answer to that question".
AI assistant will not apologize for previous responses, but instead will indicated new information was gained.
AI assistant will not invent anything that is not drawn directly from the context.
`,
};
const response = await openai.createChatCompletion({
model: 'gpt-3.5-turbo',
messages: [prompt, ...data.messages.filter((message: Message) => message.role === 'user')],
stream: true,
});
const stream = OpenAIStream(response);
return new StreamingTextResponse(stream);
} catch (error) {
console.error('There was an error getting embeddings: ', error);
throw new Error('There was an error getting embeddings');
}
}

1740
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -46,6 +46,13 @@
"packages/*"
],
"dependencies": {
"@pinecone-database/pinecone": "^1.1.1",
"@types/md5": "^2.3.4",
"ai": "^2.2.16",
"langchain": "^0.0.169",
"md5": "^2.3.0",
"openai-edge": "^1.2.2",
"pdf-parse": "^1.1.1",
"recharts": "^2.7.2"
}
}

View File

@@ -0,0 +1,35 @@
import { Pinecone } from '@pinecone-database/pinecone';
import { getEmbeddings } from './embeddings';
export async function getMatchesFromEmbeddings(embeddings: number[]) {
const pc = new Pinecone({
apiKey: process.env.PINECONE_API_KEY!,
environment: process.env.PINECONE_ENV!,
});
const pineconeIndex = pc.index('documenso-chat-with-pdf-test');
try {
const queryResult = await pineconeIndex.query({
topK: 5,
vector: embeddings,
includeMetadata: true,
});
return queryResult.matches || [];
} catch (error) {
console.error('There was an error getting matches from embeddings: ', error);
throw new Error('There was an error getting matches from embeddings');
}
}
export async function getContext(query: string) {
const queryEmbeddings = await getEmbeddings(query);
const matches = await getMatchesFromEmbeddings(queryEmbeddings);
const qualifyingMatches = matches.filter((match) => match.score && match.score > 0.7);
const docs = qualifyingMatches.map((match) => match.metadata?.text);
return docs.join('\n').substring(0, 3000);
}

View File

@@ -0,0 +1,23 @@
import { Configuration, OpenAIApi } from 'openai-edge';
const config = new Configuration({
apiKey: process.env.OPENAI_API_KEY!,
});
const openai = new OpenAIApi(config);
export async function getEmbeddings(text: string) {
try {
const response = await openai.createEmbedding({
model: 'text-embedding-ada-002',
input: text.replace(/\n/g, ' '),
});
const result = await response.json();
return result.data[0].embedding;
} catch (error) {
console.error('There was an error getting embeddings: ', error);
throw new Error('There was an error getting embeddings');
}
}

View File

@@ -0,0 +1,113 @@
import { Pinecone } from '@pinecone-database/pinecone';
import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import md5 from 'md5';
import { getEmbeddings } from './embeddings';
let pc: Pinecone | null = null;
// export type PDFPage = {
// pageContent: string;
// metadata: {
// source: string;
// pdf: {
// version: string;
// info: {
// pdfformatversion: string;
// isacroformpresent: boolean;
// isxfapresent: boolean;
// creator: string;
// producer: string;
// ceationdate: string;
// moddate: string;
// };
// metadata: null;
// totalPages: number;
// };
// loc: {
// pageNumber: number;
// };
// };
// };
export type PDFPage = unknown;
export const getPineconeClient = () => {
if (!pc) {
pc = new Pinecone({
apiKey: process.env.PINECONE_API_KEY!,
environment: process.env.PINECONE_ENV!,
});
}
return pc;
};
export async function loadFileIntoPinecone(file: string) {
if (!file) {
throw new Error('No file provided');
}
const loader = new PDFLoader(file);
const pages: PDFPage[] = await loader.load();
const documents = await Promise.all(pages.map(prepareDocument));
const vectors = await Promise.all(documents.flat().map(embedDocuments));
const client = getPineconeClient();
const pineconeIndex = client.index('documenso-chat-with-pdf-test');
try {
await pineconeIndex.upsert(vectors);
} catch (error) {
console.error('There was an error upserting vectors: ', error);
}
}
async function embedDocuments(doc) {
try {
const embeddings = await getEmbeddings(doc.pageContent);
const hash = md5(doc.pageContent);
return {
id: hash,
values: embeddings,
metadata: {
text: doc.metadata.text,
pageNumber: doc.metadata.pageNumber,
},
};
} catch (error) {
console.error('There was an error embedding documents: ', error);
throw new Error('There was an error embedding documents');
}
}
export const truncateStringByBytes = (str: string, numBytes: number) => {
const encoder = new TextEncoder();
return new TextDecoder('utf-8').decode(encoder.encode(str).slice(0, numBytes));
};
async function prepareDocument(page: PDFPage) {
let { pageContent, metadata } = page;
pageContent = pageContent.replace(/\n/g, '');
const splitter = new RecursiveCharacterTextSplitter();
const docs = await splitter.splitDocuments([
{
pageContent,
metadata: {
pageNumber: metadata.loc.pageNumber,
text: truncateStringByBytes(pageContent, 36000),
},
},
]);
return docs;
}
function convertToAscii(input: string) {
return input.replace(/[^\x00-\x7F]/g, '');
}