Infinite prompt input and compression implementation (#332)
* WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup
This commit is contained in:
parent
0751fb1fdd
commit
be9d8b0397
@ -24,7 +24,7 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) {
|
|||||||
<div className="w-full flex items-center gap-4">
|
<div className="w-full flex items-center gap-4">
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold block mb-4">
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
Anthropic Claude-2 API Key
|
Anthropic API Key
|
||||||
</label>
|
</label>
|
||||||
<input
|
<input
|
||||||
type="password"
|
type="password"
|
||||||
@ -48,7 +48,7 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) {
|
|||||||
required={true}
|
required={true}
|
||||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||||
>
|
>
|
||||||
{["claude-2"].map((model) => {
|
{["claude-2", "claude-instant-1"].map((model) => {
|
||||||
return (
|
return (
|
||||||
<option key={model} value={model}>
|
<option key={model} value={model}>
|
||||||
{model}
|
{model}
|
||||||
|
|||||||
@ -49,6 +49,23 @@ export default function AzureAiOptions({ settings }) {
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
Chat Model Token Limit
|
||||||
|
</label>
|
||||||
|
<select
|
||||||
|
name="AzureOpenAiTokenLimit"
|
||||||
|
defaultValue={settings?.AzureOpenAiTokenLimit || 4096}
|
||||||
|
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
required={true}
|
||||||
|
>
|
||||||
|
<option value={4096}>4,096 (gpt-3.5-turbo)</option>
|
||||||
|
<option value={16384}>16,384 (gpt-3.5-16k)</option>
|
||||||
|
<option value={8192}>8,192 (gpt-4)</option>
|
||||||
|
<option value={32768}>32,768 (gpt-4-32k)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold block mb-4">
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
Embedding Deployment Name
|
Embedding Deployment Name
|
||||||
|
|||||||
@ -224,7 +224,6 @@ export default function WorkspaceSettings({ workspace }) {
|
|||||||
</div>
|
</div>
|
||||||
<textarea
|
<textarea
|
||||||
name="openAiPrompt"
|
name="openAiPrompt"
|
||||||
maxLength={500}
|
|
||||||
rows={5}
|
rows={5}
|
||||||
defaultValue={chatPrompt(workspace)}
|
defaultValue={chatPrompt(workspace)}
|
||||||
className="bg-zinc-900 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
|
className="bg-zinc-900 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
|
||||||
|
|||||||
@ -55,7 +55,6 @@ export default function PromptInput({
|
|||||||
onKeyDown={captureEnter}
|
onKeyDown={captureEnter}
|
||||||
onChange={onChange}
|
onChange={onChange}
|
||||||
required={true}
|
required={true}
|
||||||
maxLength={240}
|
|
||||||
disabled={inputDisabled}
|
disabled={inputDisabled}
|
||||||
onFocus={() => setFocused(true)}
|
onFocus={() => setFocused(true)}
|
||||||
onBlur={(e) => {
|
onBlur={(e) => {
|
||||||
|
|||||||
@ -71,6 +71,7 @@ function chatEndpoints(app) {
|
|||||||
});
|
});
|
||||||
response.status(200).json({ ...result });
|
response.status(200).json({ ...result });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
response.status(500).json({
|
response.status(500).json({
|
||||||
id: uuidv4(),
|
id: uuidv4(),
|
||||||
type: "abort",
|
type: "abort",
|
||||||
|
|||||||
69
server/models/cacheData.js
Normal file
69
server/models/cacheData.js
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
const prisma = require("../utils/prisma");
|
||||||
|
|
||||||
|
const CacheData = {
|
||||||
|
new: async function (inputs = {}) {
|
||||||
|
try {
|
||||||
|
const cache = await prisma.cache_data.create({
|
||||||
|
data: inputs,
|
||||||
|
});
|
||||||
|
return { cache, message: null };
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
return { cache: null, message: error.message };
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
get: async function (clause = {}, limit = null, orderBy = null) {
|
||||||
|
try {
|
||||||
|
const cache = await prisma.cache_data.findFirst({
|
||||||
|
where: clause,
|
||||||
|
...(limit !== null ? { take: limit } : {}),
|
||||||
|
...(orderBy !== null ? { orderBy } : {}),
|
||||||
|
});
|
||||||
|
return cache || null;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
delete: async function (clause = {}) {
|
||||||
|
try {
|
||||||
|
await prisma.cache_data.deleteMany({
|
||||||
|
where: clause,
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
where: async function (clause = {}, limit = null, orderBy = null) {
|
||||||
|
try {
|
||||||
|
const caches = await prisma.cache_data.findMany({
|
||||||
|
where: clause,
|
||||||
|
...(limit !== null ? { take: limit } : {}),
|
||||||
|
...(orderBy !== null ? { orderBy } : {}),
|
||||||
|
});
|
||||||
|
return caches;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
count: async function (clause = {}) {
|
||||||
|
try {
|
||||||
|
const count = await prisma.cache_data.count({
|
||||||
|
where: clause,
|
||||||
|
});
|
||||||
|
return count;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = { CacheData };
|
||||||
@ -65,6 +65,7 @@ const SystemSettings = {
|
|||||||
AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
|
AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
|
||||||
AzureOpenAiModelPref: process.env.OPEN_MODEL_PREF,
|
AzureOpenAiModelPref: process.env.OPEN_MODEL_PREF,
|
||||||
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||||
|
AzureOpenAiTokenLimit: process.env.AZURE_OPENAI_TOKEN_LIMIT || 4096,
|
||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
|
|
||||||
|
|||||||
@ -36,6 +36,7 @@
|
|||||||
"express": "^4.18.2",
|
"express": "^4.18.2",
|
||||||
"extract-zip": "^2.0.1",
|
"extract-zip": "^2.0.1",
|
||||||
"graphql": "^16.7.1",
|
"graphql": "^16.7.1",
|
||||||
|
"js-tiktoken": "^1.0.7",
|
||||||
"jsonwebtoken": "^8.5.1",
|
"jsonwebtoken": "^8.5.1",
|
||||||
"langchain": "^0.0.90",
|
"langchain": "^0.0.90",
|
||||||
"mime": "^3.0.0",
|
"mime": "^3.0.0",
|
||||||
|
|||||||
11
server/prisma/migrations/20231101195421_init/migration.sql
Normal file
11
server/prisma/migrations/20231101195421_init/migration.sql
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "cache_data" (
|
||||||
|
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||||
|
"name" TEXT NOT NULL,
|
||||||
|
"data" TEXT NOT NULL,
|
||||||
|
"belongsTo" TEXT,
|
||||||
|
"byId" INTEGER,
|
||||||
|
"expiresAt" DATETIME,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"lastUpdatedAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
@ -116,3 +116,14 @@ model workspace_users {
|
|||||||
workspaces workspaces @relation(fields: [workspace_id], references: [id], onDelete: Cascade, onUpdate: Cascade)
|
workspaces workspaces @relation(fields: [workspace_id], references: [id], onDelete: Cascade, onUpdate: Cascade)
|
||||||
users users @relation(fields: [user_id], references: [id], onDelete: Cascade, onUpdate: Cascade)
|
users users @relation(fields: [user_id], references: [id], onDelete: Cascade, onUpdate: Cascade)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model cache_data {
|
||||||
|
id Int @id @default(autoincrement())
|
||||||
|
name String
|
||||||
|
data String
|
||||||
|
belongsTo String?
|
||||||
|
byId Int?
|
||||||
|
expiresAt DateTime?
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
lastUpdatedAt DateTime @default(now())
|
||||||
|
}
|
||||||
|
|||||||
@ -12,6 +12,12 @@ class AnthropicLLM {
|
|||||||
apiKey: process.env.ANTHROPIC_API_KEY,
|
apiKey: process.env.ANTHROPIC_API_KEY,
|
||||||
});
|
});
|
||||||
this.anthropic = anthropic;
|
this.anthropic = anthropic;
|
||||||
|
this.model = process.env.ANTHROPIC_MODEL_PREF;
|
||||||
|
this.limits = {
|
||||||
|
history: this.promptWindowLimit() * 0.15,
|
||||||
|
system: this.promptWindowLimit() * 0.15,
|
||||||
|
user: this.promptWindowLimit() * 0.7,
|
||||||
|
};
|
||||||
|
|
||||||
if (!embedder)
|
if (!embedder)
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@ -21,8 +27,19 @@ class AnthropicLLM {
|
|||||||
this.answerKey = v4().split("-")[0];
|
this.answerKey = v4().split("-")[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
isValidChatModel(modelName = "") {
|
promptWindowLimit() {
|
||||||
const validModels = ["claude-2"];
|
switch (this.model) {
|
||||||
|
case "claude-instant-1":
|
||||||
|
return 72_000;
|
||||||
|
case "claude-2":
|
||||||
|
return 100_000;
|
||||||
|
default:
|
||||||
|
return 72_000; // assume a claude-instant-1 model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
isValidChatCompletionModel(modelName = "") {
|
||||||
|
const validModels = ["claude-2", "claude-instant-1"];
|
||||||
return validModels.includes(modelName);
|
return validModels.includes(modelName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,24 +79,25 @@ class AnthropicLLM {
|
|||||||
\n\nAssistant:`;
|
\n\nAssistant:`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is the interface used when no embeddings are present in the workspace
|
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
// This is just having a conversation with the LLM as one would normally.
|
if (!this.isValidChatCompletionModel(this.model))
|
||||||
async sendChat(chatHistory = [], prompt, workspace = {}) {
|
|
||||||
const model = process.env.ANTHROPIC_MODEL_PREF || "claude-2";
|
|
||||||
if (!this.isValidChatModel(model))
|
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Anthropic chat: ${model} is not valid for chat completion!`
|
`Anthropic chat: ${this.model} is not valid for chat completion!`
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const compressedPrompt = await this.compressMessages(
|
||||||
|
{
|
||||||
|
systemPrompt: chatPrompt(workspace),
|
||||||
|
userPrompt: prompt,
|
||||||
|
chatHistory,
|
||||||
|
},
|
||||||
|
rawHistory
|
||||||
|
);
|
||||||
const { content, error } = await this.anthropic.completions
|
const { content, error } = await this.anthropic.completions
|
||||||
.create({
|
.create({
|
||||||
model: "claude-2",
|
model: this.model,
|
||||||
max_tokens_to_sample: 300,
|
max_tokens_to_sample: 300,
|
||||||
prompt: this.constructPrompt({
|
prompt: compressedPrompt,
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
userPrompt: prompt,
|
|
||||||
chatHistory,
|
|
||||||
}),
|
|
||||||
})
|
})
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
const { completion } = res;
|
const { completion } = res;
|
||||||
@ -100,15 +118,14 @@ class AnthropicLLM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async getChatCompletion(prompt = "", _opts = {}) {
|
async getChatCompletion(prompt = "", _opts = {}) {
|
||||||
const model = process.env.ANTHROPIC_MODEL_PREF || "claude-2";
|
if (!this.isValidChatCompletionModel(this.model))
|
||||||
if (!this.isValidChatModel(model))
|
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Anthropic chat: ${model} is not valid for chat completion!`
|
`Anthropic chat: ${this.model} is not valid for chat completion!`
|
||||||
);
|
);
|
||||||
|
|
||||||
const { content, error } = await this.anthropic.completions
|
const { content, error } = await this.anthropic.completions
|
||||||
.create({
|
.create({
|
||||||
model: "claude-2",
|
model: this.model,
|
||||||
max_tokens_to_sample: 300,
|
max_tokens_to_sample: 300,
|
||||||
prompt,
|
prompt,
|
||||||
})
|
})
|
||||||
@ -130,6 +147,16 @@ class AnthropicLLM {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||||
|
const { messageStringCompressor } = require("../../helpers/chat");
|
||||||
|
const compressedPrompt = await messageStringCompressor(
|
||||||
|
this,
|
||||||
|
promptArgs,
|
||||||
|
rawHistory
|
||||||
|
);
|
||||||
|
return compressedPrompt;
|
||||||
|
}
|
||||||
|
|
||||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||||
async embedTextInput(textInput) {
|
async embedTextInput(textInput) {
|
||||||
return await this.embedder.embedTextInput(textInput);
|
return await this.embedder.embedTextInput(textInput);
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
|
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
|
||||||
|
const { chatPrompt } = require("../../chats");
|
||||||
|
|
||||||
class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
||||||
constructor() {
|
constructor() {
|
||||||
@ -13,9 +14,24 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
|||||||
process.env.AZURE_OPENAI_ENDPOINT,
|
process.env.AZURE_OPENAI_ENDPOINT,
|
||||||
new AzureKeyCredential(process.env.AZURE_OPENAI_KEY)
|
new AzureKeyCredential(process.env.AZURE_OPENAI_KEY)
|
||||||
);
|
);
|
||||||
|
this.model = process.env.OPEN_MODEL_PREF;
|
||||||
|
this.limits = {
|
||||||
|
history: this.promptWindowLimit() * 0.15,
|
||||||
|
system: this.promptWindowLimit() * 0.15,
|
||||||
|
user: this.promptWindowLimit() * 0.7,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
isValidChatModel(_modelName = "") {
|
// Sure the user selected a proper value for the token limit
|
||||||
|
// could be any of these https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-models
|
||||||
|
// and if undefined - assume it is the lowest end.
|
||||||
|
promptWindowLimit() {
|
||||||
|
return !!process.env.AZURE_OPENAI_TOKEN_LIMIT
|
||||||
|
? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT)
|
||||||
|
: 4096;
|
||||||
|
}
|
||||||
|
|
||||||
|
isValidChatCompletionModel(_modelName = "") {
|
||||||
// The Azure user names their "models" as deployments and they can be any name
|
// The Azure user names their "models" as deployments and they can be any name
|
||||||
// so we rely on the user to put in the correct deployment as only they would
|
// so we rely on the user to put in the correct deployment as only they would
|
||||||
// know it.
|
// know it.
|
||||||
@ -31,7 +47,7 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
|||||||
const prompt = {
|
const prompt = {
|
||||||
role: "system",
|
role: "system",
|
||||||
content: `${systemPrompt}
|
content: `${systemPrompt}
|
||||||
Context:
|
Context:
|
||||||
${contextTexts
|
${contextTexts
|
||||||
.map((text, i) => {
|
.map((text, i) => {
|
||||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||||
@ -46,26 +62,25 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
|||||||
return { safe: true, reasons: [] };
|
return { safe: true, reasons: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendChat(chatHistory = [], prompt, workspace = {}) {
|
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
const model = process.env.OPEN_MODEL_PREF;
|
if (!this.model)
|
||||||
if (!model)
|
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
|
"No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const messages = await this.compressMessages(
|
||||||
|
{
|
||||||
|
systemPrompt: chatPrompt(workspace),
|
||||||
|
userPrompt: prompt,
|
||||||
|
chatHistory,
|
||||||
|
},
|
||||||
|
rawHistory
|
||||||
|
);
|
||||||
const textResponse = await this.openai
|
const textResponse = await this.openai
|
||||||
.getChatCompletions(
|
.getChatCompletions(this.model, messages, {
|
||||||
model,
|
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||||
[
|
n: 1,
|
||||||
{ role: "system", content: "" },
|
})
|
||||||
...chatHistory,
|
|
||||||
{ role: "user", content: prompt },
|
|
||||||
],
|
|
||||||
{
|
|
||||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
|
||||||
n: 1,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
if (!res.hasOwnProperty("choices"))
|
if (!res.hasOwnProperty("choices"))
|
||||||
throw new Error("OpenAI chat: No results!");
|
throw new Error("OpenAI chat: No results!");
|
||||||
@ -83,18 +98,23 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async getChatCompletion(messages = [], { temperature = 0.7 }) {
|
async getChatCompletion(messages = [], { temperature = 0.7 }) {
|
||||||
const model = process.env.OPEN_MODEL_PREF;
|
if (!this.model)
|
||||||
if (!model)
|
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
|
"No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
|
||||||
);
|
);
|
||||||
|
|
||||||
const data = await this.openai.getChatCompletions(model, messages, {
|
const data = await this.openai.getChatCompletions(this.model, messages, {
|
||||||
temperature,
|
temperature,
|
||||||
});
|
});
|
||||||
if (!data.hasOwnProperty("choices")) return null;
|
if (!data.hasOwnProperty("choices")) return null;
|
||||||
return data.choices[0].message.content;
|
return data.choices[0].message.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||||
|
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||||
|
const messageArray = this.constructPrompt(promptArgs);
|
||||||
|
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
|
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
|
||||||
|
const { chatPrompt } = require("../../chats");
|
||||||
|
|
||||||
class OpenAiLLM extends OpenAiEmbedder {
|
class OpenAiLLM extends OpenAiEmbedder {
|
||||||
constructor() {
|
constructor() {
|
||||||
@ -10,6 +11,23 @@ class OpenAiLLM extends OpenAiEmbedder {
|
|||||||
apiKey: process.env.OPEN_AI_KEY,
|
apiKey: process.env.OPEN_AI_KEY,
|
||||||
});
|
});
|
||||||
this.openai = new OpenAIApi(config);
|
this.openai = new OpenAIApi(config);
|
||||||
|
this.model = process.env.OPEN_MODEL_PREF;
|
||||||
|
this.limits = {
|
||||||
|
history: this.promptWindowLimit() * 0.15,
|
||||||
|
system: this.promptWindowLimit() * 0.15,
|
||||||
|
user: this.promptWindowLimit() * 0.7,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
promptWindowLimit() {
|
||||||
|
switch (this.model) {
|
||||||
|
case "gpt-3.5-turbo":
|
||||||
|
return 4096;
|
||||||
|
case "gpt-4":
|
||||||
|
return 8192;
|
||||||
|
default:
|
||||||
|
return 4096; // assume a fine-tune 3.5
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async isValidChatCompletionModel(modelName = "") {
|
async isValidChatCompletionModel(modelName = "") {
|
||||||
@ -33,7 +51,7 @@ class OpenAiLLM extends OpenAiEmbedder {
|
|||||||
const prompt = {
|
const prompt = {
|
||||||
role: "system",
|
role: "system",
|
||||||
content: `${systemPrompt}
|
content: `${systemPrompt}
|
||||||
Context:
|
Context:
|
||||||
${contextTexts
|
${contextTexts
|
||||||
.map((text, i) => {
|
.map((text, i) => {
|
||||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||||
@ -75,7 +93,7 @@ class OpenAiLLM extends OpenAiEmbedder {
|
|||||||
return { safe: false, reasons };
|
return { safe: false, reasons };
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendChat(chatHistory = [], prompt, workspace = {}) {
|
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
const model = process.env.OPEN_MODEL_PREF;
|
const model = process.env.OPEN_MODEL_PREF;
|
||||||
if (!(await this.isValidChatCompletionModel(model)))
|
if (!(await this.isValidChatCompletionModel(model)))
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@ -87,11 +105,14 @@ class OpenAiLLM extends OpenAiEmbedder {
|
|||||||
model,
|
model,
|
||||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||||
n: 1,
|
n: 1,
|
||||||
messages: [
|
messages: await this.compressMessages(
|
||||||
{ role: "system", content: "" },
|
{
|
||||||
...chatHistory,
|
systemPrompt: chatPrompt(workspace),
|
||||||
{ role: "user", content: prompt },
|
userPrompt: prompt,
|
||||||
],
|
chatHistory,
|
||||||
|
},
|
||||||
|
rawHistory
|
||||||
|
),
|
||||||
})
|
})
|
||||||
.then((json) => {
|
.then((json) => {
|
||||||
const res = json.data;
|
const res = json.data;
|
||||||
@ -111,14 +132,13 @@ class OpenAiLLM extends OpenAiEmbedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
|
if (!(await this.isValidChatCompletionModel(this.model)))
|
||||||
if (!(await this.isValidChatCompletionModel(model)))
|
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`OpenAI chat: ${model} is not valid for chat completion!`
|
`OpenAI chat: ${this.model} is not valid for chat completion!`
|
||||||
);
|
);
|
||||||
|
|
||||||
const { data } = await this.openai.createChatCompletion({
|
const { data } = await this.openai.createChatCompletion({
|
||||||
model,
|
model: this.model,
|
||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
});
|
});
|
||||||
@ -126,6 +146,12 @@ class OpenAiLLM extends OpenAiEmbedder {
|
|||||||
if (!data.hasOwnProperty("choices")) return null;
|
if (!data.hasOwnProperty("choices")) return null;
|
||||||
return data.choices[0].message.content;
|
return data.choices[0].message.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||||
|
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||||
|
const messageArray = this.constructPrompt(promptArgs);
|
||||||
|
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|||||||
@ -91,91 +91,146 @@ async function chatWithWorkspace(
|
|||||||
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
||||||
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
||||||
if (!hasVectorizedSpace || embeddingsCount === 0) {
|
if (!hasVectorizedSpace || embeddingsCount === 0) {
|
||||||
const rawHistory = (
|
// If there are no embeddings - chat like a normal LLM chat interface.
|
||||||
user
|
return await emptyEmbeddingChat({
|
||||||
? await WorkspaceChats.forWorkspaceByUser(
|
uuid,
|
||||||
workspace.id,
|
user,
|
||||||
user.id,
|
|
||||||
messageLimit,
|
|
||||||
{ id: "desc" }
|
|
||||||
)
|
|
||||||
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
|
|
||||||
id: "desc",
|
|
||||||
})
|
|
||||||
).reverse();
|
|
||||||
const chatHistory = convertToPromptHistory(rawHistory);
|
|
||||||
const response = await LLMConnector.sendChat(
|
|
||||||
chatHistory,
|
|
||||||
message,
|
message,
|
||||||
workspace
|
|
||||||
);
|
|
||||||
const data = { text: response, sources: [], type: "chat" };
|
|
||||||
|
|
||||||
await WorkspaceChats.new({
|
|
||||||
workspaceId: workspace.id,
|
|
||||||
prompt: message,
|
|
||||||
response: data,
|
|
||||||
user,
|
|
||||||
});
|
|
||||||
return {
|
|
||||||
id: uuid,
|
|
||||||
type: "textResponse",
|
|
||||||
textResponse: response,
|
|
||||||
sources: [],
|
|
||||||
close: true,
|
|
||||||
error: null,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
const rawHistory = (
|
|
||||||
user
|
|
||||||
? await WorkspaceChats.forWorkspaceByUser(
|
|
||||||
workspace.id,
|
|
||||||
user.id,
|
|
||||||
messageLimit,
|
|
||||||
{ id: "desc" }
|
|
||||||
)
|
|
||||||
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
|
|
||||||
id: "desc",
|
|
||||||
})
|
|
||||||
).reverse();
|
|
||||||
const chatHistory = convertToPromptHistory(rawHistory);
|
|
||||||
const {
|
|
||||||
response,
|
|
||||||
sources,
|
|
||||||
message: error,
|
|
||||||
} = await VectorDb[chatMode]({
|
|
||||||
namespace: workspace.slug,
|
|
||||||
input: message,
|
|
||||||
workspace,
|
workspace,
|
||||||
chatHistory,
|
messageLimit,
|
||||||
|
LLMConnector,
|
||||||
});
|
});
|
||||||
if (!response) {
|
}
|
||||||
return {
|
|
||||||
id: uuid,
|
|
||||||
type: "abort",
|
|
||||||
textResponse: null,
|
|
||||||
sources: [],
|
|
||||||
close: true,
|
|
||||||
error,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = { text: response, sources, type: chatMode };
|
const { rawHistory, chatHistory } = await recentChatHistory(
|
||||||
await WorkspaceChats.new({
|
user,
|
||||||
workspaceId: workspace.id,
|
workspace,
|
||||||
prompt: message,
|
messageLimit,
|
||||||
response: data,
|
chatMode
|
||||||
user,
|
);
|
||||||
});
|
const {
|
||||||
|
contextTexts = [],
|
||||||
|
sources = [],
|
||||||
|
message: error,
|
||||||
|
} = await VectorDb.performSimilaritySearch({
|
||||||
|
namespace: workspace.slug,
|
||||||
|
input: message,
|
||||||
|
LLMConnector,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Failed similarity search.
|
||||||
|
if (!!error) {
|
||||||
return {
|
return {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: "textResponse",
|
type: "abort",
|
||||||
textResponse: response,
|
textResponse: null,
|
||||||
sources,
|
sources: [],
|
||||||
close: true,
|
close: true,
|
||||||
error,
|
error,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compress message to ensure prompt passes token limit with room for response
|
||||||
|
// and build system messages based on inputs and history.
|
||||||
|
const messages = await LLMConnector.compressMessages(
|
||||||
|
{
|
||||||
|
systemPrompt: chatPrompt(workspace),
|
||||||
|
userPrompt: message,
|
||||||
|
contextTexts,
|
||||||
|
chatHistory,
|
||||||
|
},
|
||||||
|
rawHistory
|
||||||
|
);
|
||||||
|
|
||||||
|
// Send the text completion.
|
||||||
|
const textResponse = await LLMConnector.getChatCompletion(messages, {
|
||||||
|
temperature: workspace?.openAiTemp ?? 0.7,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!textResponse) {
|
||||||
|
return {
|
||||||
|
id: uuid,
|
||||||
|
type: "abort",
|
||||||
|
textResponse: null,
|
||||||
|
sources: [],
|
||||||
|
close: true,
|
||||||
|
error: "No text completion could be completed with this input.",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
await WorkspaceChats.new({
|
||||||
|
workspaceId: workspace.id,
|
||||||
|
prompt: message,
|
||||||
|
response: { text: textResponse, sources, type: chatMode },
|
||||||
|
user,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
id: uuid,
|
||||||
|
type: "textResponse",
|
||||||
|
close: true,
|
||||||
|
textResponse,
|
||||||
|
sources,
|
||||||
|
error,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// On query we dont return message history. All other chat modes and when chatting
|
||||||
|
// with no embeddings we return history.
|
||||||
|
async function recentChatHistory(
|
||||||
|
user = null,
|
||||||
|
workspace,
|
||||||
|
messageLimit = 20,
|
||||||
|
chatMode = null
|
||||||
|
) {
|
||||||
|
if (chatMode === "query") return [];
|
||||||
|
const rawHistory = (
|
||||||
|
user
|
||||||
|
? await WorkspaceChats.forWorkspaceByUser(
|
||||||
|
workspace.id,
|
||||||
|
user.id,
|
||||||
|
messageLimit,
|
||||||
|
{ id: "desc" }
|
||||||
|
)
|
||||||
|
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
|
||||||
|
id: "desc",
|
||||||
|
})
|
||||||
|
).reverse();
|
||||||
|
return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function emptyEmbeddingChat({
|
||||||
|
uuid,
|
||||||
|
user,
|
||||||
|
message,
|
||||||
|
workspace,
|
||||||
|
messageLimit,
|
||||||
|
LLMConnector,
|
||||||
|
}) {
|
||||||
|
const { rawHistory, chatHistory } = await recentChatHistory(
|
||||||
|
user,
|
||||||
|
workspace,
|
||||||
|
messageLimit
|
||||||
|
);
|
||||||
|
const textResponse = await LLMConnector.sendChat(
|
||||||
|
chatHistory,
|
||||||
|
message,
|
||||||
|
workspace,
|
||||||
|
rawHistory
|
||||||
|
);
|
||||||
|
await WorkspaceChats.new({
|
||||||
|
workspaceId: workspace.id,
|
||||||
|
prompt: message,
|
||||||
|
response: { text: textResponse, sources: [], type: "chat" },
|
||||||
|
user,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
id: uuid,
|
||||||
|
type: "textResponse",
|
||||||
|
sources: [],
|
||||||
|
close: true,
|
||||||
|
error: null,
|
||||||
|
textResponse,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function chatPrompt(workspace) {
|
function chatPrompt(workspace) {
|
||||||
@ -186,6 +241,7 @@ function chatPrompt(workspace) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
convertToPromptHistory,
|
||||||
convertToChatHistory,
|
convertToChatHistory,
|
||||||
chatWithWorkspace,
|
chatWithWorkspace,
|
||||||
chatPrompt,
|
chatPrompt,
|
||||||
|
|||||||
325
server/utils/helpers/chat/index.js
Normal file
325
server/utils/helpers/chat/index.js
Normal file
@ -0,0 +1,325 @@
|
|||||||
|
const { convertToPromptHistory } = require("../../chats");
|
||||||
|
const { TokenManager } = require("../tiktoken");
|
||||||
|
|
||||||
|
/*
|
||||||
|
What is the message Array compressor?
|
||||||
|
TLDR: So anyway, i started blasting (your prompts & stuff)
|
||||||
|
|
||||||
|
messageArrayCompressor arose out of a need for users to be able to insert unlimited token prompts
|
||||||
|
and also maintain coherent history, system instructions and context, if applicable.
|
||||||
|
|
||||||
|
We took an opinionated approach that after much back-testing we have found retained a highly coherent answer
|
||||||
|
under most user conditions that a user would take while using this specific system. While other systems may
|
||||||
|
use a more advanced model for compressing message history or simplify text through a recursive approach - our is much more simple.
|
||||||
|
|
||||||
|
We "cannonball" the input.
|
||||||
|
Cannonball (verb): To ensure a prompt fits through a model window we blast a hole in the center of any inputs blocking our path to doing so.
|
||||||
|
This starts by dissecting the input as tokens and delete from the middle-out bi-directionally until the prompt window is satisfied.
|
||||||
|
You may think: "Doesn't this result in massive data loss?" - yes & no.
|
||||||
|
Under the use cases we expect the tool to be used, which is mostly chatting with documents, we are able to use this approach with minimal blowback
|
||||||
|
on the quality of responses.
|
||||||
|
|
||||||
|
We accomplish this by taking a rate-limit approach that is proportional to the model capacity. Since we support more than openAI models, this needs to
|
||||||
|
be generic and reliance on a "better summary" model just is not a luxury we can afford. The added latency overhead during prompting is also unacceptable.
|
||||||
|
In general:
|
||||||
|
system: at best 15% of token capacity
|
||||||
|
history: at best 15% of token capacity
|
||||||
|
prompt: at best 70% of token capacity.
|
||||||
|
|
||||||
|
we handle overflows by taking an aggressive path for two main cases.
|
||||||
|
|
||||||
|
1. Very large user prompt
|
||||||
|
- Likely uninterested in context, history, or even system prompt. This is a "standalone" prompt that highjacks the whole thread.
|
||||||
|
- We run this prompt on its own since a prompt that is over 70% of context window certainly is standalone.
|
||||||
|
|
||||||
|
2. Context window is exceeded in regular use.
|
||||||
|
- We do not touch prompt since it is very likely to be <70% of window.
|
||||||
|
- We check system prompt is not outrageous - if it is we cannonball it and keep context if present.
|
||||||
|
- We check a sliding window of history, only allowing up to 15% of the history to pass through if it fits, with a
|
||||||
|
preference for recent history if we can cannonball to fit it, otherwise it is omitted.
|
||||||
|
|
||||||
|
We end up with a rather large prompt that fits through a given window with a lot of room for response in most use-cases.
|
||||||
|
We also take the approach that history is the least important and most flexible of the items in this array of responses.
|
||||||
|
|
||||||
|
There is a supplemental version of this function that also returns a formatted string for models like Claude-2
|
||||||
|
*/
|
||||||
|
|
||||||
|
async function messageArrayCompressor(llm, messages = [], rawHistory = []) {
|
||||||
|
// assume the response will be at least 600 tokens. If the total prompt + reply is over we need to proactively
|
||||||
|
// run the compressor to ensure the prompt has enough space to reply.
|
||||||
|
// realistically - most users will not be impacted by this.
|
||||||
|
const tokenBuffer = 600;
|
||||||
|
const tokenManager = new TokenManager(llm.model);
|
||||||
|
// If no work needs to be done, just pass through.
|
||||||
|
if (tokenManager.statsFrom(messages) + tokenBuffer < llm.promptWindowLimit())
|
||||||
|
return messages;
|
||||||
|
|
||||||
|
const system = messages.shift();
|
||||||
|
const user = messages.pop();
|
||||||
|
const userPromptSize = tokenManager.countFromString(user.content);
|
||||||
|
|
||||||
|
// User prompt is the main focus here - we we prioritize it and allow
|
||||||
|
// it to highjack the entire conversation thread. We are going to
|
||||||
|
// cannonball the prompt through to ensure the reply has at least 20% of
|
||||||
|
// the token supply to reply with.
|
||||||
|
if (userPromptSize > llm.limits.user) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: cannonball({
|
||||||
|
input: user.content,
|
||||||
|
targetTokenSize: llm.promptWindowLimit() * 0.8,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
const compressedSystem = new Promise(async (resolve) => {
|
||||||
|
const count = tokenManager.countFromString(system.content);
|
||||||
|
if (count < llm.limits.system) {
|
||||||
|
resolve(system);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split context from system prompt - cannonball since its over the window.
|
||||||
|
// We assume the context + user prompt is enough tokens to fit.
|
||||||
|
const [prompt, context = ""] = system.content.split("Context:");
|
||||||
|
system.content = `${cannonball({
|
||||||
|
input: prompt,
|
||||||
|
targetTokenSize: llm.limits.system,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
})}${context ? `\nContext: ${context}` : ""}`;
|
||||||
|
resolve(system);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Prompt is allowed to take up to 70% of window - we know its under
|
||||||
|
// if we are here, so passthrough.
|
||||||
|
const compressedPrompt = new Promise(async (resolve) => resolve(user));
|
||||||
|
|
||||||
|
// We always aggressively compress history because it is the least
|
||||||
|
// important data to retain in full-fidelity.
|
||||||
|
const compressedHistory = new Promise((resolve) => {
|
||||||
|
const eligibleHistoryItems = [];
|
||||||
|
var historyTokenCount = 0;
|
||||||
|
|
||||||
|
for (const [i, history] of rawHistory.reverse().entries()) {
|
||||||
|
const [user, assistant] = convertToPromptHistory([history]);
|
||||||
|
const [userTokens, assistantTokens] = [
|
||||||
|
tokenManager.countFromString(user.content),
|
||||||
|
tokenManager.countFromString(assistant.content),
|
||||||
|
];
|
||||||
|
const total = userTokens + assistantTokens;
|
||||||
|
|
||||||
|
// If during the loop the token cost of adding this history
|
||||||
|
// is small, we can add it to history and move onto next.
|
||||||
|
if (historyTokenCount + total < llm.limits.history) {
|
||||||
|
eligibleHistoryItems.unshift(user, assistant);
|
||||||
|
historyTokenCount += total;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we reach here the overhead of adding this history item will
|
||||||
|
// be too much of the limit. So now, we are prioritizing
|
||||||
|
// the most recent 3 message pairs - if we are already past those - exit loop and stop
|
||||||
|
// trying to make history work.
|
||||||
|
if (i > 2) break;
|
||||||
|
|
||||||
|
// We are over the limit and we are within the first 3 most recent chats.
|
||||||
|
// so now we cannonball them to make them fit into the window.
|
||||||
|
// max size = llm.limit.history; Each component of the message, can at most
|
||||||
|
// be 50% of the history. We cannonball whichever is the problem.
|
||||||
|
// The math isnt perfect for tokens, so we have to add a fudge factor for safety.
|
||||||
|
const maxTargetSize = Math.floor(llm.limits.history / 2.2);
|
||||||
|
if (userTokens > maxTargetSize) {
|
||||||
|
user.content = cannonball({
|
||||||
|
input: user.content,
|
||||||
|
targetTokenSize: maxTargetSize,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assistantTokens > maxTargetSize) {
|
||||||
|
assistant.content = cannonball({
|
||||||
|
input: assistant.content,
|
||||||
|
targetTokenSize: maxTargetSize,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const newTotal = tokenManager.statsFrom([user, assistant]);
|
||||||
|
if (historyTokenCount + newTotal > llm.limits.history) continue;
|
||||||
|
eligibleHistoryItems.unshift(user, assistant);
|
||||||
|
historyTokenCount += newTotal;
|
||||||
|
}
|
||||||
|
resolve(eligibleHistoryItems);
|
||||||
|
});
|
||||||
|
|
||||||
|
const [cSystem, cHistory, cPrompt] = await Promise.all([
|
||||||
|
compressedSystem,
|
||||||
|
compressedHistory,
|
||||||
|
compressedPrompt,
|
||||||
|
]);
|
||||||
|
return [cSystem, ...cHistory, cPrompt];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementation of messageArrayCompressor, but for string only completion models
|
||||||
|
async function messageStringCompressor(llm, promptArgs = {}, rawHistory = []) {
|
||||||
|
const tokenBuffer = 600;
|
||||||
|
const tokenManager = new TokenManager(llm.model);
|
||||||
|
const initialPrompt = llm.constructPrompt(promptArgs);
|
||||||
|
if (
|
||||||
|
tokenManager.statsFrom(initialPrompt) + tokenBuffer <
|
||||||
|
llm.promptWindowLimit()
|
||||||
|
)
|
||||||
|
return initialPrompt;
|
||||||
|
|
||||||
|
const system = promptArgs.systemPrompt;
|
||||||
|
const user = promptArgs.userPrompt;
|
||||||
|
const userPromptSize = tokenManager.countFromString(user);
|
||||||
|
|
||||||
|
// User prompt is the main focus here - we we prioritize it and allow
|
||||||
|
// it to highjack the entire conversation thread. We are going to
|
||||||
|
// cannonball the prompt through to ensure the reply has at least 20% of
|
||||||
|
// the token supply to reply with.
|
||||||
|
if (userPromptSize > llm.limits.user) {
|
||||||
|
return llm.constructPrompt({
|
||||||
|
userPrompt: cannonball({
|
||||||
|
input: user,
|
||||||
|
targetTokenSize: llm.promptWindowLimit() * 0.8,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const compressedSystem = new Promise(async (resolve) => {
|
||||||
|
const count = tokenManager.countFromString(system);
|
||||||
|
if (count < llm.limits.system) {
|
||||||
|
resolve(system);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolve(
|
||||||
|
cannonball({
|
||||||
|
input: system,
|
||||||
|
targetTokenSize: llm.limits.system,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Prompt is allowed to take up to 70% of window - we know its under
|
||||||
|
// if we are here, so passthrough.
|
||||||
|
const compressedPrompt = new Promise(async (resolve) => resolve(user));
|
||||||
|
|
||||||
|
// We always aggressively compress history because it is the least
|
||||||
|
// important data to retain in full-fidelity.
|
||||||
|
const compressedHistory = new Promise((resolve) => {
|
||||||
|
const eligibleHistoryItems = [];
|
||||||
|
var historyTokenCount = 0;
|
||||||
|
|
||||||
|
for (const [i, history] of rawHistory.reverse().entries()) {
|
||||||
|
const [user, assistant] = convertToPromptHistory([history]);
|
||||||
|
const [userTokens, assistantTokens] = [
|
||||||
|
tokenManager.countFromString(user.content),
|
||||||
|
tokenManager.countFromString(assistant.content),
|
||||||
|
];
|
||||||
|
const total = userTokens + assistantTokens;
|
||||||
|
|
||||||
|
// If during the loop the token cost of adding this history
|
||||||
|
// is small, we can add it to history and move onto next.
|
||||||
|
if (historyTokenCount + total < llm.limits.history) {
|
||||||
|
eligibleHistoryItems.unshift(user, assistant);
|
||||||
|
historyTokenCount += total;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we reach here the overhead of adding this history item will
|
||||||
|
// be too much of the limit. So now, we are prioritizing
|
||||||
|
// the most recent 3 message pairs - if we are already past those - exit loop and stop
|
||||||
|
// trying to make history work.
|
||||||
|
if (i > 2) break;
|
||||||
|
|
||||||
|
// We are over the limit and we are within the first 3 most recent chats.
|
||||||
|
// so now we cannonball them to make them fit into the window.
|
||||||
|
// max size = llm.limit.history; Each component of the message, can at most
|
||||||
|
// be 50% of the history. We cannonball whichever is the problem.
|
||||||
|
// The math isnt perfect for tokens, so we have to add a fudge factor for safety.
|
||||||
|
const maxTargetSize = Math.floor(llm.limits.history / 2.2);
|
||||||
|
if (userTokens > maxTargetSize) {
|
||||||
|
user.content = cannonball({
|
||||||
|
input: user.content,
|
||||||
|
targetTokenSize: maxTargetSize,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assistantTokens > maxTargetSize) {
|
||||||
|
assistant.content = cannonball({
|
||||||
|
input: assistant.content,
|
||||||
|
targetTokenSize: maxTargetSize,
|
||||||
|
tiktokenInstance: tokenManager,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const newTotal = tokenManager.statsFrom([user, assistant]);
|
||||||
|
if (historyTokenCount + newTotal > llm.limits.history) continue;
|
||||||
|
eligibleHistoryItems.unshift(user, assistant);
|
||||||
|
historyTokenCount += newTotal;
|
||||||
|
}
|
||||||
|
resolve(eligibleHistoryItems);
|
||||||
|
});
|
||||||
|
|
||||||
|
const [cSystem, cHistory, cPrompt] = await Promise.all([
|
||||||
|
compressedSystem,
|
||||||
|
compressedHistory,
|
||||||
|
compressedPrompt,
|
||||||
|
]);
|
||||||
|
|
||||||
|
return llm.constructPrompt({
|
||||||
|
systemPrompt: cSystem,
|
||||||
|
contextTexts: promptArgs?.contextTexts || [],
|
||||||
|
chatHistory: cHistory,
|
||||||
|
userPrompt: cPrompt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannonball prompting: aka where we shoot a proportionally big cannonball through a proportional large prompt
|
||||||
|
// Nobody should be sending prompts this big, but there is no reason we shouldn't allow it if results are good even by doing it.
|
||||||
|
function cannonball({
|
||||||
|
input = "",
|
||||||
|
targetTokenSize = 0,
|
||||||
|
tiktokenInstance = null,
|
||||||
|
ellipsesStr = null,
|
||||||
|
}) {
|
||||||
|
if (!input || !targetTokenSize) return input;
|
||||||
|
const tokenManager = tiktokenInstance || new TokenManager();
|
||||||
|
const truncText = ellipsesStr || "\n\n--prompt truncated for brevity--\n\n";
|
||||||
|
const initialInputSize = tokenManager.countFromString(input);
|
||||||
|
if (initialInputSize < targetTokenSize) return input;
|
||||||
|
|
||||||
|
// if the delta is the token difference between where our prompt is in size
|
||||||
|
// and where we ideally need to land.
|
||||||
|
const delta = initialInputSize - targetTokenSize;
|
||||||
|
const tokenChunks = tokenManager.tokensFromString(input);
|
||||||
|
const middleIdx = Math.floor(tokenChunks.length / 2);
|
||||||
|
|
||||||
|
// middle truncate the text going left and right of midpoint
|
||||||
|
const leftChunks = tokenChunks.slice(0, middleIdx - Math.round(delta / 2));
|
||||||
|
const rightChunks = tokenChunks.slice(middleIdx + Math.round(delta / 2));
|
||||||
|
const truncatedText =
|
||||||
|
tokenManager.bytesFromTokens(leftChunks) +
|
||||||
|
truncText +
|
||||||
|
tokenManager.bytesFromTokens(rightChunks);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`Cannonball results ${initialInputSize} -> ${tokenManager.countFromString(
|
||||||
|
truncatedText
|
||||||
|
)} tokens.`
|
||||||
|
);
|
||||||
|
return truncatedText;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
messageArrayCompressor,
|
||||||
|
messageStringCompressor,
|
||||||
|
};
|
||||||
57
server/utils/helpers/tiktoken.js
Normal file
57
server/utils/helpers/tiktoken.js
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");
|
||||||
|
|
||||||
|
class TokenManager {
|
||||||
|
constructor(model = "gpt-3.5-turbo") {
|
||||||
|
this.model = model;
|
||||||
|
this.encoderName = this.getEncodingFromModel(model);
|
||||||
|
this.encoder = getEncoding(this.encoderName);
|
||||||
|
this.buffer = 50;
|
||||||
|
}
|
||||||
|
|
||||||
|
getEncodingFromModel(model) {
|
||||||
|
try {
|
||||||
|
return getEncodingNameForModel(model);
|
||||||
|
} catch {
|
||||||
|
return "cl100k_base";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokensFromString(input = "") {
|
||||||
|
const tokens = this.encoder.encode(input);
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytesFromTokens(tokens = []) {
|
||||||
|
const bytes = this.encoder.decode(tokens);
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
countFromString(input = "") {
|
||||||
|
const tokens = this.encoder.encode(input);
|
||||||
|
return tokens.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
statsFrom(input) {
|
||||||
|
if (typeof input === "string") return this.countFromString(input);
|
||||||
|
|
||||||
|
// What is going on here?
|
||||||
|
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb Item 6.
|
||||||
|
// The only option is to estimate. From repeated testing using the static values in the code we are always 2 off,
|
||||||
|
// which means as of Nov 1, 2023 the additional factor on ln: 476 changed from 3 to 5.
|
||||||
|
if (Array.isArray(input)) {
|
||||||
|
const perMessageFactorTokens = input.length * 3;
|
||||||
|
const tokensFromContent = input.reduce(
|
||||||
|
(a, b) => a + this.countFromString(b.content),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
const diffCoefficient = 5;
|
||||||
|
return perMessageFactorTokens + tokensFromContent + diffCoefficient;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("Not a supported tokenized format.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
TokenManager,
|
||||||
|
};
|
||||||
@ -17,6 +17,10 @@ const KEY_MAPPING = {
|
|||||||
envKey: "AZURE_OPENAI_ENDPOINT",
|
envKey: "AZURE_OPENAI_ENDPOINT",
|
||||||
checks: [isNotEmpty, validAzureURL],
|
checks: [isNotEmpty, validAzureURL],
|
||||||
},
|
},
|
||||||
|
AzureOpenAiTokenLimit: {
|
||||||
|
envKey: "AZURE_OPENAI_TOKEN_LIMIT",
|
||||||
|
checks: [validOpenAiTokenLimit],
|
||||||
|
},
|
||||||
AzureOpenAiKey: {
|
AzureOpenAiKey: {
|
||||||
envKey: "AZURE_OPENAI_KEY",
|
envKey: "AZURE_OPENAI_KEY",
|
||||||
checks: [isNotEmpty],
|
checks: [isNotEmpty],
|
||||||
@ -137,7 +141,7 @@ function supportedLLM(input = "") {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function validAnthropicModel(input = "") {
|
function validAnthropicModel(input = "") {
|
||||||
const validModels = ["claude-2"];
|
const validModels = ["claude-2", "claude-instant-1"];
|
||||||
return validModels.includes(input)
|
return validModels.includes(input)
|
||||||
? null
|
? null
|
||||||
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
|
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
|
||||||
@ -174,6 +178,14 @@ function validAzureURL(input = "") {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function validOpenAiTokenLimit(input = "") {
|
||||||
|
const tokenLimit = Number(input);
|
||||||
|
if (isNaN(tokenLimit)) return "Token limit is not a number";
|
||||||
|
if (![4_096, 16_384, 8_192, 32_768].includes(tokenLimit))
|
||||||
|
return "Invalid OpenAI token limit.";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
function requiresForceMode(_, forceModeEnabled = false) {
|
function requiresForceMode(_, forceModeEnabled = false) {
|
||||||
return forceModeEnabled === true ? null : "Cannot set this setting.";
|
return forceModeEnabled === true ? null : "Cannot set this setting.";
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
|||||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||||
const { chatPrompt } = require("../../chats");
|
|
||||||
|
|
||||||
const Chroma = {
|
const Chroma = {
|
||||||
name: "Chroma",
|
name: "Chroma",
|
||||||
@ -253,92 +252,35 @@ const Chroma = {
|
|||||||
await DocumentVectors.deleteIds(indexes);
|
await DocumentVectors.deleteIds(indexes);
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
query: async function (reqBody = {}) {
|
performSimilaritySearch: async function ({
|
||||||
const { namespace = null, input, workspace = {} } = reqBody;
|
namespace = null,
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
input = "",
|
||||||
|
LLMConnector = null,
|
||||||
|
}) {
|
||||||
|
if (!namespace || !input || !LLMConnector)
|
||||||
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
|
|
||||||
const { client } = await this.connect();
|
const { client } = await this.connect();
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
if (!(await this.namespaceExists(client, namespace))) {
|
||||||
return {
|
return {
|
||||||
response: null,
|
contextTexts: [],
|
||||||
sources: [],
|
sources: [],
|
||||||
message: "Invalid query - no documents found for workspace!",
|
message: "Invalid query - no documents found for workspace!",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
const queryVector = await LLMConnector.embedTextInput(input);
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||||
client,
|
client,
|
||||||
namespace,
|
namespace,
|
||||||
queryVector
|
queryVector
|
||||||
);
|
);
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
// When we roll out own response we have separate metadata and texts,
|
|
||||||
// so for source collection we need to combine them.
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
return { metadata: { ...metadata, text: contextTexts[i] } };
|
return { metadata: { ...metadata, text: contextTexts[i] } };
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
response: responseText,
|
contextTexts,
|
||||||
sources: this.curateSources(sources),
|
|
||||||
message: false,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
|
||||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
|
||||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
|
||||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
|
||||||
chat: async function (reqBody = {}) {
|
|
||||||
const {
|
|
||||||
namespace = null,
|
|
||||||
input,
|
|
||||||
workspace = {},
|
|
||||||
chatHistory = [],
|
|
||||||
} = reqBody;
|
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
|
||||||
|
|
||||||
const { client } = await this.connect();
|
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
|
||||||
return {
|
|
||||||
response: null,
|
|
||||||
sources: [],
|
|
||||||
message: "Invalid query - no documents found for workspace!",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
|
||||||
client,
|
|
||||||
namespace,
|
|
||||||
queryVector
|
|
||||||
);
|
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
chatHistory,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
// When we roll out own response we have separate metadata and texts,
|
|
||||||
// so for source collection we need to combine them.
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
|
||||||
return { metadata: { ...metadata, text: contextTexts[i] } };
|
|
||||||
});
|
|
||||||
return {
|
|
||||||
response: responseText,
|
|
||||||
sources: this.curateSources(sources),
|
sources: this.curateSources(sources),
|
||||||
message: false,
|
message: false,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -4,7 +4,6 @@ const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
|
|||||||
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { chatPrompt } = require("../../chats");
|
|
||||||
|
|
||||||
const LanceDb = {
|
const LanceDb = {
|
||||||
uri: `${
|
uri: `${
|
||||||
@ -226,83 +225,36 @@ const LanceDb = {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
query: async function (reqBody = {}) {
|
performSimilaritySearch: async function ({
|
||||||
const { namespace = null, input, workspace = {} } = reqBody;
|
namespace = null,
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
input = "",
|
||||||
|
LLMConnector = null,
|
||||||
|
}) {
|
||||||
|
if (!namespace || !input || !LLMConnector)
|
||||||
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
|
|
||||||
const { client } = await this.connect();
|
const { client } = await this.connect();
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
if (!(await this.namespaceExists(client, namespace))) {
|
||||||
return {
|
return {
|
||||||
response: null,
|
contextTexts: [],
|
||||||
sources: [],
|
sources: [],
|
||||||
message: "Invalid query - no documents found for workspace!",
|
message: "Invalid query - no documents found for workspace!",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
const queryVector = await LLMConnector.embedTextInput(input);
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||||
client,
|
client,
|
||||||
namespace,
|
namespace,
|
||||||
queryVector
|
queryVector
|
||||||
);
|
);
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
return { metadata: { ...metadata, text: contextTexts[i] } };
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
response: responseText,
|
contextTexts,
|
||||||
sources: this.curateSources(sourceDocuments),
|
sources: this.curateSources(sources),
|
||||||
message: false,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
|
||||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
|
||||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
|
||||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
|
||||||
chat: async function (reqBody = {}) {
|
|
||||||
const {
|
|
||||||
namespace = null,
|
|
||||||
input,
|
|
||||||
workspace = {},
|
|
||||||
chatHistory = [],
|
|
||||||
} = reqBody;
|
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
|
||||||
|
|
||||||
const { client } = await this.connect();
|
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
|
||||||
return {
|
|
||||||
response: null,
|
|
||||||
sources: [],
|
|
||||||
message: "Invalid query - no documents found for workspace!",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
|
||||||
client,
|
|
||||||
namespace,
|
|
||||||
queryVector
|
|
||||||
);
|
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
chatHistory,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
response: responseText,
|
|
||||||
sources: this.curateSources(sourceDocuments),
|
|
||||||
message: false,
|
message: false,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
@ -337,9 +289,13 @@ const LanceDb = {
|
|||||||
curateSources: function (sources = []) {
|
curateSources: function (sources = []) {
|
||||||
const documents = [];
|
const documents = [];
|
||||||
for (const source of sources) {
|
for (const source of sources) {
|
||||||
const { text, vector: _v, score: _s, ...metadata } = source;
|
const { text, vector: _v, score: _s, ...rest } = source;
|
||||||
|
const metadata = rest.hasOwnProperty("metadata") ? rest.metadata : rest;
|
||||||
if (Object.keys(metadata).length > 0) {
|
if (Object.keys(metadata).length > 0) {
|
||||||
documents.push({ ...metadata, text });
|
documents.push({
|
||||||
|
...metadata,
|
||||||
|
...(text ? { text } : {}),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
|||||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||||
const { chatPrompt } = require("../../chats");
|
|
||||||
|
|
||||||
const Pinecone = {
|
const Pinecone = {
|
||||||
name: "Pinecone",
|
name: "Pinecone",
|
||||||
@ -222,80 +221,33 @@ const Pinecone = {
|
|||||||
message: `Namespace ${namespace} was deleted along with ${details.vectorCount} vectors.`,
|
message: `Namespace ${namespace} was deleted along with ${details.vectorCount} vectors.`,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
query: async function (reqBody = {}) {
|
performSimilaritySearch: async function ({
|
||||||
const { namespace = null, input, workspace = {} } = reqBody;
|
namespace = null,
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
input = "",
|
||||||
|
LLMConnector = null,
|
||||||
const { pineconeIndex } = await this.connect();
|
}) {
|
||||||
if (!(await this.namespaceExists(pineconeIndex, namespace))) {
|
if (!namespace || !input || !LLMConnector)
|
||||||
return {
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
response: null,
|
|
||||||
sources: [],
|
|
||||||
message: "Invalid query - no documents found for workspace!",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
|
||||||
pineconeIndex,
|
|
||||||
namespace,
|
|
||||||
queryVector
|
|
||||||
);
|
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
response: responseText,
|
|
||||||
sources: this.curateSources(sourceDocuments),
|
|
||||||
message: false,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
|
||||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
|
||||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
|
||||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
|
||||||
chat: async function (reqBody = {}) {
|
|
||||||
const {
|
|
||||||
namespace = null,
|
|
||||||
input,
|
|
||||||
workspace = {},
|
|
||||||
chatHistory = [],
|
|
||||||
} = reqBody;
|
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
|
||||||
|
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
if (!(await this.namespaceExists(pineconeIndex, namespace)))
|
if (!(await this.namespaceExists(pineconeIndex, namespace)))
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Invalid namespace - has it been collected and seeded yet?"
|
"Invalid namespace - has it been collected and populated yet?"
|
||||||
);
|
);
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
const queryVector = await LLMConnector.embedTextInput(input);
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||||
pineconeIndex,
|
pineconeIndex,
|
||||||
namespace,
|
namespace,
|
||||||
queryVector
|
queryVector
|
||||||
);
|
);
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
chatHistory,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
return { ...metadata, text: contextTexts[i] };
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
response: responseText,
|
contextTexts,
|
||||||
sources: this.curateSources(sourceDocuments),
|
sources: this.curateSources(sources),
|
||||||
message: false,
|
message: false,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
|
|||||||
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
|||||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||||
const { chatPrompt } = require("../../chats");
|
|
||||||
|
|
||||||
const QDrant = {
|
const QDrant = {
|
||||||
name: "QDrant",
|
name: "QDrant",
|
||||||
@ -262,83 +261,36 @@ const QDrant = {
|
|||||||
await DocumentVectors.deleteIds(indexes);
|
await DocumentVectors.deleteIds(indexes);
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
query: async function (reqBody = {}) {
|
performSimilaritySearch: async function ({
|
||||||
const { namespace = null, input, workspace = {} } = reqBody;
|
namespace = null,
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
input = "",
|
||||||
|
LLMConnector = null,
|
||||||
|
}) {
|
||||||
|
if (!namespace || !input || !LLMConnector)
|
||||||
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
|
|
||||||
const { client } = await this.connect();
|
const { client } = await this.connect();
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
if (!(await this.namespaceExists(client, namespace))) {
|
||||||
return {
|
return {
|
||||||
response: null,
|
contextTexts: [],
|
||||||
sources: [],
|
sources: [],
|
||||||
message: "Invalid query - no documents found for workspace!",
|
message: "Invalid query - no documents found for workspace!",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
const queryVector = await LLMConnector.embedTextInput(input);
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||||
client,
|
client,
|
||||||
namespace,
|
namespace,
|
||||||
queryVector
|
queryVector
|
||||||
);
|
);
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
return { ...metadata, text: contextTexts[i] };
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
response: responseText,
|
contextTexts,
|
||||||
sources: this.curateSources(sourceDocuments),
|
sources: this.curateSources(sources),
|
||||||
message: false,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
|
||||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
|
||||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
|
||||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
|
||||||
chat: async function (reqBody = {}) {
|
|
||||||
const {
|
|
||||||
namespace = null,
|
|
||||||
input,
|
|
||||||
workspace = {},
|
|
||||||
chatHistory = [],
|
|
||||||
} = reqBody;
|
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
|
||||||
|
|
||||||
const { client } = await this.connect();
|
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
|
||||||
return {
|
|
||||||
response: null,
|
|
||||||
sources: [],
|
|
||||||
message: "Invalid query - no documents found for workspace!",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
|
||||||
client,
|
|
||||||
namespace,
|
|
||||||
queryVector
|
|
||||||
);
|
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
chatHistory,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
response: responseText,
|
|
||||||
sources: this.curateSources(sourceDocuments),
|
|
||||||
message: false,
|
message: false,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
@ -377,8 +329,11 @@ const QDrant = {
|
|||||||
const documents = [];
|
const documents = [];
|
||||||
for (const source of sources) {
|
for (const source of sources) {
|
||||||
if (Object.keys(source).length > 0) {
|
if (Object.keys(source).length > 0) {
|
||||||
|
const metadata = source.hasOwnProperty("metadata")
|
||||||
|
? source.metadata
|
||||||
|
: source;
|
||||||
documents.push({
|
documents.push({
|
||||||
...source,
|
...metadata,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
|||||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||||
const { chatPrompt } = require("../../chats");
|
|
||||||
const { camelCase } = require("../../helpers/camelcase");
|
const { camelCase } = require("../../helpers/camelcase");
|
||||||
|
|
||||||
const Weaviate = {
|
const Weaviate = {
|
||||||
@ -333,83 +332,36 @@ const Weaviate = {
|
|||||||
await DocumentVectors.deleteIds(indexes);
|
await DocumentVectors.deleteIds(indexes);
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
query: async function (reqBody = {}) {
|
performSimilaritySearch: async function ({
|
||||||
const { namespace = null, input, workspace = {} } = reqBody;
|
namespace = null,
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
input = "",
|
||||||
|
LLMConnector = null,
|
||||||
|
}) {
|
||||||
|
if (!namespace || !input || !LLMConnector)
|
||||||
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
|
|
||||||
const { client } = await this.connect();
|
const { client } = await this.connect();
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
if (!(await this.namespaceExists(client, namespace))) {
|
||||||
return {
|
return {
|
||||||
response: null,
|
contextTexts: [],
|
||||||
sources: [],
|
sources: [],
|
||||||
message: "Invalid query - no documents found for workspace!",
|
message: "Invalid query - no documents found for workspace!",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
const queryVector = await LLMConnector.embedTextInput(input);
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||||
client,
|
client,
|
||||||
namespace,
|
namespace,
|
||||||
queryVector
|
queryVector
|
||||||
);
|
);
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
return { ...metadata, text: contextTexts[i] };
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
response: responseText,
|
contextTexts,
|
||||||
sources: this.curateSources(sourceDocuments),
|
sources: this.curateSources(sources),
|
||||||
message: false,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
|
||||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
|
||||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
|
||||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
|
||||||
chat: async function (reqBody = {}) {
|
|
||||||
const {
|
|
||||||
namespace = null,
|
|
||||||
input,
|
|
||||||
workspace = {},
|
|
||||||
chatHistory = [],
|
|
||||||
} = reqBody;
|
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
|
||||||
|
|
||||||
const { client } = await this.connect();
|
|
||||||
if (!(await this.namespaceExists(client, namespace))) {
|
|
||||||
return {
|
|
||||||
response: null,
|
|
||||||
sources: [],
|
|
||||||
message: "Invalid query - no documents found for workspace!",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const LLMConnector = getLLMProvider();
|
|
||||||
const queryVector = await LLMConnector.embedTextInput(input);
|
|
||||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
|
||||||
client,
|
|
||||||
namespace,
|
|
||||||
queryVector
|
|
||||||
);
|
|
||||||
const memory = LLMConnector.constructPrompt({
|
|
||||||
systemPrompt: chatPrompt(workspace),
|
|
||||||
contextTexts: contextTexts,
|
|
||||||
userPrompt: input,
|
|
||||||
chatHistory,
|
|
||||||
});
|
|
||||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
|
||||||
temperature: workspace?.openAiTemp ?? 0.7,
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
response: responseText,
|
|
||||||
sources: this.curateSources(sourceDocuments),
|
|
||||||
message: false,
|
message: false,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
@ -445,7 +397,10 @@ const Weaviate = {
|
|||||||
const documents = [];
|
const documents = [];
|
||||||
for (const source of sources) {
|
for (const source of sources) {
|
||||||
if (Object.keys(source).length > 0) {
|
if (Object.keys(source).length > 0) {
|
||||||
documents.push(source);
|
const metadata = source.hasOwnProperty("metadata")
|
||||||
|
? source.metadata
|
||||||
|
: source;
|
||||||
|
documents.push({ ...metadata });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1556,7 +1556,7 @@ isomorphic-fetch@^3.0.0:
|
|||||||
node-fetch "^2.6.1"
|
node-fetch "^2.6.1"
|
||||||
whatwg-fetch "^3.4.1"
|
whatwg-fetch "^3.4.1"
|
||||||
|
|
||||||
js-tiktoken@^1.0.6:
|
js-tiktoken@^1.0.6, js-tiktoken@^1.0.7:
|
||||||
version "1.0.7"
|
version "1.0.7"
|
||||||
resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.7.tgz#56933fcd2093e8304060dfde3071bda91812e6f5"
|
resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.7.tgz#56933fcd2093e8304060dfde3071bda91812e6f5"
|
||||||
integrity sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw==
|
integrity sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw==
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user