From 5ad8a5f2d0c545a913319a641877d70aa7b82b09 Mon Sep 17 00:00:00 2001 From: Sean Hatfield Date: Thu, 16 Nov 2023 15:19:49 -0800 Subject: [PATCH] Allow use of any embedder for any llm/update data handling modal (#386) * allow use of any embedder for any llm/update data handling modal * Apply embedder override and fallback to OpenAI and Azure models --------- Co-authored-by: timothycarambat --- frontend/src/components/Sidebar/index.jsx | 2 +- .../EmbeddingPreference/index.jsx | 306 +++++++++--------- .../Steps/DataHandling/index.jsx | 55 +++- .../Steps/EmbeddingSelection/index.jsx | 13 + .../Steps/LLMSelection/index.jsx | 10 +- .../OnboardingFlow/OnboardingModal/index.jsx | 3 +- server/utils/AiProviders/azureOpenAi/index.js | 19 +- server/utils/AiProviders/openAi/index.js | 19 +- server/utils/helpers/index.js | 9 +- 9 files changed, 256 insertions(+), 180 deletions(-) diff --git a/frontend/src/components/Sidebar/index.jsx b/frontend/src/components/Sidebar/index.jsx index f0c27f09..95f232d7 100644 --- a/frontend/src/components/Sidebar/index.jsx +++ b/frontend/src/components/Sidebar/index.jsx @@ -278,7 +278,7 @@ function SettingsButton() { return ( diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index f35c167b..cc272edb 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -133,161 +133,161 @@ export default function GeneralEmbeddingPreference() {

- {["openai", "azure"].includes(settings.LLMProvider) ? ( -
-

- Your current LLM preference does not require you to set up - this part of AnythingLLM. -
- Embedding is being automatically managed by AnythingLLM. -

+ <> +
+ Embedding Providers
- ) : ( - <> -
- Embedding Providers -
-
- - - - -
-
- {embeddingChoice === "openai" && ( - <> -
- - -
- - )} - - {embeddingChoice === "azure" && ( - <> -
- - -
- -
- - -
- -
- - -
- - )} - - {embeddingChoice === "localai" && ( - <> -
- - setBasePathValue(e.target.value)} - onBlur={updateBasePath} - required={true} - autoComplete="off" - spellCheck={false} - /> -
- + + + + +
+
+ {embeddingChoice === "openai" && ( + <> +
+ + - - )} -
- - )} +
+
+ + +
+ + )} + + {embeddingChoice === "azure" && ( + <> +
+ + +
+ +
+ + +
+ +
+ + +
+ + )} + + {embeddingChoice === "localai" && ( + <> +
+ + setBasePathValue(e.target.value)} + onBlur={updateBasePath} + required={true} + autoComplete="off" + spellCheck={false} + /> +
+ + + )} +
+ diff --git a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx index 2f133ff5..b81a6795 100644 --- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx @@ -97,16 +97,44 @@ const VECTOR_DB_PRIVACY = { }, }; +const EMBEDDING_ENGINE_PRIVACY = { + openai: { + name: "OpenAI", + description: [ + "Your documents are visible to OpenAI", + "Your documents are not used for training", + ], + logo: OpenAiLogo, + }, + azure: { + name: "Azure OpenAI", + description: [ + "Your documents are not visible to OpenAI or Microsoft", + "Your documents not used for training", + ], + logo: AzureOpenAiLogo, + }, + localai: { + name: "LocalAI", + description: [ + "Your documents are only accessible on the server running LocalAI", + ], + logo: LocalAiLogo, + }, +}; + function DataHandling({ nextStep, prevStep, currentStep }) { const [llmChoice, setLLMChoice] = useState("openai"); const [loading, setLoading] = useState(true); const [vectorDb, setVectorDb] = useState("pinecone"); + const [embeddingEngine, setEmbeddingEngine] = useState("openai"); useEffect(() => { async function fetchKeys() { const _settings = await System.keys(); setLLMChoice(_settings?.LLMProvider); setVectorDb(_settings?.VectorDB); + setEmbeddingEngine(_settings?.EmbeddingEngine); setLoading(false); } @@ -124,8 +152,8 @@ function DataHandling({ nextStep, prevStep, currentStep }) { return (
-
-
+
+
LLM Selection
-
+
+
Embedding Engine
+
+ Vector DB Logo +

+ {EMBEDDING_ENGINE_PRIVACY[embeddingEngine].name} +

+
+
    + {EMBEDDING_ENGINE_PRIVACY[embeddingEngine].description.map( + (desc) => ( +
  • {desc}
  • + ) + )} +
+
+ +
Vector Database
+
+ + +
)} diff --git a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx index e3582309..73e68c05 100644 --- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx +++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx @@ -46,15 +46,7 @@ function LLMSelection({ nextStep, prevStep, currentStep }) { alert(`Failed to save LLM settings: ${error}`, "error"); return; } - - switch (data.LLMProvider) { - case "anthropic": - case "lmstudio": - case "localai": - return nextStep("embedding_preferences"); - default: - return nextStep("vector_database"); - } + nextStep("embedding_preferences"); }; if (loading) diff --git a/frontend/src/pages/OnboardingFlow/OnboardingModal/index.jsx b/frontend/src/pages/OnboardingFlow/OnboardingModal/index.jsx index c4dceed8..d00b7e95 100644 --- a/frontend/src/pages/OnboardingFlow/OnboardingModal/index.jsx +++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/index.jsx @@ -65,8 +65,7 @@ const STEPS = { }, embedding_preferences: { title: "Embedding Preference", - description: - "Due to your LLM selection you need to set up a provider for embedding files and text.", + description: "Choose a provider for embedding files and text.", component: EmbeddingSelection, }, }; diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js index a424902b..82e28204 100644 --- a/server/utils/AiProviders/azureOpenAi/index.js +++ b/server/utils/AiProviders/azureOpenAi/index.js @@ -1,9 +1,8 @@ const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi"); const { chatPrompt } = require("../../chats"); -class AzureOpenAiLLM extends AzureOpenAiEmbedder { - constructor() { - super(); +class AzureOpenAiLLM { + constructor(embedder = null) { const { OpenAIClient, AzureKeyCredential } = require("@azure/openai"); if (!process.env.AZURE_OPENAI_ENDPOINT) throw new Error("No Azure API endpoint was set."); @@ -20,6 +19,12 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder { system: this.promptWindowLimit() * 0.15, user: this.promptWindowLimit() * 0.7, }; + + if (!embedder) + console.warn( + "No embedding provider defined for AzureOpenAiLLM - falling back to AzureOpenAiEmbedder for embedding!" + ); + this.embedder = !embedder ? new AzureOpenAiEmbedder() : embedder; } streamingEnabled() { @@ -114,6 +119,14 @@ Context: return data.choices[0].message.content; } + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + async compressMessages(promptArgs = {}, rawHistory = []) { const { messageArrayCompressor } = require("../../helpers/chat"); const messageArray = this.constructPrompt(promptArgs); diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index 0c5b7116..46464271 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -1,9 +1,8 @@ const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi"); const { chatPrompt } = require("../../chats"); -class OpenAiLLM extends OpenAiEmbedder { - constructor() { - super(); +class OpenAiLLM { + constructor(embedder = null) { const { Configuration, OpenAIApi } = require("openai"); if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set."); @@ -17,6 +16,12 @@ class OpenAiLLM extends OpenAiEmbedder { system: this.promptWindowLimit() * 0.15, user: this.promptWindowLimit() * 0.7, }; + + if (!embedder) + console.warn( + "No embedding provider defined for OpenAiLLM - falling back to OpenAiEmbedder for embedding!" + ); + this.embedder = !embedder ? new OpenAiEmbedder() : embedder; } streamingEnabled() { @@ -203,6 +208,14 @@ Context: return streamRequest; } + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + async compressMessages(promptArgs = {}, rawHistory = []) { const { messageArrayCompressor } = require("../../helpers/chat"); const messageArray = this.constructPrompt(promptArgs); diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index c7c61822..74804b90 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -23,25 +23,22 @@ function getVectorDbClass() { function getLLMProvider() { const vectorSelection = process.env.LLM_PROVIDER || "openai"; - let embedder = null; + const embedder = getEmbeddingEngineSelection(); switch (vectorSelection) { case "openai": const { OpenAiLLM } = require("../AiProviders/openAi"); - return new OpenAiLLM(); + return new OpenAiLLM(embedder); case "azure": const { AzureOpenAiLLM } = require("../AiProviders/azureOpenAi"); - return new AzureOpenAiLLM(); + return new AzureOpenAiLLM(embedder); case "anthropic": const { AnthropicLLM } = require("../AiProviders/anthropic"); - embedder = getEmbeddingEngineSelection(); return new AnthropicLLM(embedder); case "lmstudio": const { LMStudioLLM } = require("../AiProviders/lmStudio"); - embedder = getEmbeddingEngineSelection(); return new LMStudioLLM(embedder); case "localai": const { LocalAiLLM } = require("../AiProviders/localAi"); - embedder = getEmbeddingEngineSelection(); return new LocalAiLLM(embedder); default: throw new Error("ENV: No LLM_PROVIDER value found in environment!");