Allow use of any embedder for any llm/update data handling modal (#386)

* allow use of any embedder for any llm/update data handling modal * Apply embedder override and fallback to OpenAI and Azure models --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2023-11-16 15:19:49 -08:00 · 2023-11-16 15:19:49 -08:00 · 5ad8a5f2d0
commit 5ad8a5f2d0
parent 2c2543b4d7
9 changed files with 256 additions and 180 deletions
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@ -133,16 +133,6 @@ export default function GeneralEmbeddingPreference() {
                </p>
              </div>

-              {["openai", "azure"].includes(settings.LLMProvider) ? (
-                <div className="w-full h-20 items-center justify-center flex">
-                  <p className="text-gray-800 dark:text-slate-400 text-center">
-                    Your current LLM preference does not require you to set up
-                    this part of AnythingLLM.
-                    <br />
-                    Embedding is being automatically managed by AnythingLLM.
-                  </p>
-                </div>
-              ) : (
              <>
                <div className="text-white text-sm font-medium py-4">
                  Embedding Providers
@ -201,6 +191,19 @@ export default function GeneralEmbeddingPreference() {
                          spellCheck={false}
                        />
                      </div>
+                      <div className="flex flex-col w-60">
+                        <label className="text-white text-sm font-semibold block mb-4">
+                          Model Preference
+                        </label>
+                        <select
+                          disabled={true}
+                          className="cursor-not-allowed bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+                        >
+                          <option disabled={true} selected={true}>
+                            text-embedding-ada-002
+                          </option>
+                        </select>
+                      </div>
                    </>
                  )}

@ -249,9 +252,7 @@ export default function GeneralEmbeddingPreference() {
                          name="AzureOpenAiEmbeddingModelPref"
                          className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
                          placeholder="Azure OpenAI embedding model deployment name"
-                            defaultValue={
-                              settings?.AzureOpenAiEmbeddingModelPref
-                            }
+                          defaultValue={settings?.AzureOpenAiEmbeddingModelPref}
                          required={true}
                          autoComplete="off"
                          spellCheck={false}
@ -287,7 +288,6 @@ export default function GeneralEmbeddingPreference() {
                  )}
                </div>
              </>
-              )}
            </div>
          </form>
        </div>
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/DataHandling/index.jsx
@ -97,16 +97,44 @@ const VECTOR_DB_PRIVACY = {
  },
 };

+const EMBEDDING_ENGINE_PRIVACY = {
+  openai: {
+    name: "OpenAI",
+    description: [
+      "Your documents are visible to OpenAI",
+      "Your documents are not used for training",
+    ],
+    logo: OpenAiLogo,
+  },
+  azure: {
+    name: "Azure OpenAI",
+    description: [
+      "Your documents are not visible to OpenAI or Microsoft",
+      "Your documents not used for training",
+    ],
+    logo: AzureOpenAiLogo,
+  },
+  localai: {
+    name: "LocalAI",
+    description: [
+      "Your documents are only accessible on the server running LocalAI",
+    ],
+    logo: LocalAiLogo,
+  },
+};
+
 function DataHandling({ nextStep, prevStep, currentStep }) {
  const [llmChoice, setLLMChoice] = useState("openai");
  const [loading, setLoading] = useState(true);
  const [vectorDb, setVectorDb] = useState("pinecone");
+  const [embeddingEngine, setEmbeddingEngine] = useState("openai");

  useEffect(() => {
    async function fetchKeys() {
      const _settings = await System.keys();
      setLLMChoice(_settings?.LLMProvider);
      setVectorDb(_settings?.VectorDB);
+      setEmbeddingEngine(_settings?.EmbeddingEngine);

      setLoading(false);
    }
@ -124,8 +152,8 @@ function DataHandling({ nextStep, prevStep, currentStep }) {

  return (
    <div className="max-w-[750px]">
-      <div className="p-8 flex gap-x-16">
-        <div className="w-1/2 flex flex-col gap-y-3.5">
+      <div className="p-8 flex flex-col gap-8">
+        <div className="flex flex-col gap-y-3.5 border-b border-zinc-500/50 pb-8">
          <div className="text-white text-base font-bold">LLM Selection</div>
          <div className="flex items-center gap-2.5">
            <img
@ -146,7 +174,28 @@ function DataHandling({ nextStep, prevStep, currentStep }) {
          </div>
        </div>

-        <div className="w-1/2 flex flex-col gap-y-3.5">
+        <div className="flex flex-col gap-y-3.5 border-b border-zinc-500/50 pb-8">
+          <div className="text-white text-base font-bold">Embedding Engine</div>
+          <div className="flex items-center gap-2.5">
+            <img
+              src={EMBEDDING_ENGINE_PRIVACY[embeddingEngine].logo}
+              alt="Vector DB Logo"
+              className="w-8 h-8 rounded"
+            />
+            <p className="text-white text-sm font-bold">
+              {EMBEDDING_ENGINE_PRIVACY[embeddingEngine].name}
+            </p>
+          </div>
+          <ul className="flex flex-col list-disc">
+            {EMBEDDING_ENGINE_PRIVACY[embeddingEngine].description.map(
+              (desc) => (
+                <li className="text-white/90 text-sm">{desc}</li>
+              )
+            )}
+          </ul>
+        </div>
+
+        <div className="flex flex-col gap-y-3.5 ">
          <div className="text-white text-base font-bold">Vector Database</div>
          <div className="flex items-center gap-2.5">
            <img
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/EmbeddingSelection/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/EmbeddingSelection/index.jsx
@ -113,6 +113,19 @@ function EmbeddingSelection({ nextStep, prevStep, currentStep }) {
                    spellCheck={false}
                  />
                </div>
+                <div className="flex flex-col w-60">
+                  <label className="text-white text-sm font-semibold block mb-4">
+                    Model Preference
+                  </label>
+                  <select
+                    disabled={true}
+                    className="cursor-not-allowed bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+                  >
+                    <option disabled={true} selected={true}>
+                      text-embedding-ada-002
+                    </option>
+                  </select>
+                </div>
              </>
            )}

--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/Steps/LLMSelection/index.jsx
@ -46,15 +46,7 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
      alert(`Failed to save LLM settings: ${error}`, "error");
      return;
    }
-
-    switch (data.LLMProvider) {
-      case "anthropic":
-      case "lmstudio":
-      case "localai":
-        return nextStep("embedding_preferences");
-      default:
-        return nextStep("vector_database");
-    }
+    nextStep("embedding_preferences");
  };

  if (loading)
--- a/frontend/src/pages/OnboardingFlow/OnboardingModal/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/OnboardingModal/index.jsx
@ -65,8 +65,7 @@ const STEPS = {
  },
  embedding_preferences: {
    title: "Embedding Preference",
-    description:
-      "Due to your LLM selection you need to set up a provider for embedding files and text.",
+    description: "Choose a provider for embedding files and text.",
    component: EmbeddingSelection,
  },
 };
--- a/server/utils/AiProviders/azureOpenAi/index.js
+++ b/server/utils/AiProviders/azureOpenAi/index.js
@ -1,9 +1,8 @@
 const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
 const { chatPrompt } = require("../../chats");

-class AzureOpenAiLLM extends AzureOpenAiEmbedder {
-  constructor() {
-    super();
+class AzureOpenAiLLM {
+  constructor(embedder = null) {
    const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
    if (!process.env.AZURE_OPENAI_ENDPOINT)
      throw new Error("No Azure API endpoint was set.");
@ -20,6 +19,12 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
+
+    if (!embedder)
+      console.warn(
+        "No embedding provider defined for AzureOpenAiLLM - falling back to AzureOpenAiEmbedder for embedding!"
+      );
+    this.embedder = !embedder ? new AzureOpenAiEmbedder() : embedder;
  }

  streamingEnabled() {
@ -114,6 +119,14 @@ Context:
    return data.choices[0].message.content;
  }

+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@ -1,9 +1,8 @@
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
 const { chatPrompt } = require("../../chats");

-class OpenAiLLM extends OpenAiEmbedder {
-  constructor() {
-    super();
+class OpenAiLLM {
+  constructor(embedder = null) {
    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");

@ -17,6 +16,12 @@ class OpenAiLLM extends OpenAiEmbedder {
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
+
+    if (!embedder)
+      console.warn(
+        "No embedding provider defined for OpenAiLLM - falling back to OpenAiEmbedder for embedding!"
+      );
+    this.embedder = !embedder ? new OpenAiEmbedder() : embedder;
  }

  streamingEnabled() {
@ -203,6 +208,14 @@ Context:
    return streamRequest;
  }

+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
  async compressMessages(promptArgs = {}, rawHistory = []) {
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -23,25 +23,22 @@ function getVectorDbClass() {

 function getLLMProvider() {
  const vectorSelection = process.env.LLM_PROVIDER || "openai";
-  let embedder = null;
+  const embedder = getEmbeddingEngineSelection();
  switch (vectorSelection) {
    case "openai":
      const { OpenAiLLM } = require("../AiProviders/openAi");
-      return new OpenAiLLM();
+      return new OpenAiLLM(embedder);
    case "azure":
      const { AzureOpenAiLLM } = require("../AiProviders/azureOpenAi");
-      return new AzureOpenAiLLM();
+      return new AzureOpenAiLLM(embedder);
    case "anthropic":
      const { AnthropicLLM } = require("../AiProviders/anthropic");
-      embedder = getEmbeddingEngineSelection();
      return new AnthropicLLM(embedder);
    case "lmstudio":
      const { LMStudioLLM } = require("../AiProviders/lmStudio");
-      embedder = getEmbeddingEngineSelection();
      return new LMStudioLLM(embedder);
    case "localai":
      const { LocalAiLLM } = require("../AiProviders/localAi");
-      embedder = getEmbeddingEngineSelection();
      return new LocalAiLLM(embedder);
    default:
      throw new Error("ENV: No LLM_PROVIDER value found in environment!");