Support external transcription providers (#909)

* Support External Transcription providers * patch files * update docs * fix return data
2024-03-14 15:43:26 -07:00 · 2024-03-14 15:43:26 -07:00 · 0ada882991
commit 0ada882991
parent 1352b18b5f
19 changed files with 541 additions and 110 deletions
--- a/collector/index.js
+++ b/collector/index.js
@ -25,7 +25,7 @@ app.use(
 );
 app.post("/process", async function (request, response) {
-  const { filename } = reqBody(request);
+  const { filename, options = {} } = reqBody(request);
  try {
    const targetFilename = path
      .normalize(filename)
@ -34,7 +34,7 @@ app.post("/process", async function (request, response) {
      success,
      reason,
      documents = [],
-    } = await processSingleFile(targetFilename);
+    } = await processSingleFile(targetFilename, options);
    response
      .status(200)
      .json({ filename: targetFilename, success, reason, documents });
--- a/collector/package.json
+++ b/collector/package.json
@ -33,6 +33,7 @@
    "moment": "^2.29.4",
    "multer": "^1.4.5-lts.1",
    "officeparser": "^4.0.5",
    "openai": "^3.2.1",
    "pdf-parse": "^1.1.1",
    "puppeteer": "~21.5.2",
    "slugify": "^1.6.6",
@ -46,4 +47,4 @@
    "nodemon": "^2.0.22",
    "prettier": "^2.4.1"
  }
-}
+}
--- a/collector/processSingleFile/convert/asAudio.js
+++ b/collector/processSingleFile/convert/asAudio.js
@ -1,5 +1,3 @@
 const fs = require("fs");
 const path = require("path");
 const { v4 } = require("uuid");
 const {
  createdDate,
@ -9,39 +7,35 @@ const {
 const { tokenizeString } = require("../../utils/tokenizer");
 const { default: slugify } = require("slugify");
 const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper");
 const { OpenAiWhisper } = require("../../utils/WhisperProviders/OpenAiWhisper");
-async function asAudio({ fullFilePath = "", filename = "" }) {
+const WHISPER_PROVIDERS = {
-  const whisper = new LocalWhisper();
+  openai: OpenAiWhisper,
  local: LocalWhisper,
 };
 async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
  const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
    options?.whisperProvider
  )
    ? WHISPER_PROVIDERS[options?.whisperProvider]
    : WHISPER_PROVIDERS.local;
  console.log(`-- Working ${filename} --`);
-  const transcriberPromise = new Promise((resolve) =>
+  const whisper = new WhisperProvider({ options });
-    whisper.client().then((client) => resolve(client))
+  const { content, error } = await whisper.processFile(fullFilePath, filename);
  );
  const audioDataPromise = new Promise((resolve) =>
    convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData))
  );
  const [audioData, transcriber] = await Promise.all([
    audioDataPromise,
    transcriberPromise,
  ]);
-  if (!audioData) {
+  if (!!error) {
-    console.error(`Failed to parse content from ${filename}.`);
+    console.error(`Error encountered for parsing of ${filename}.`);
    trashFile(fullFilePath);
    return {
      success: false,
-      reason: `Failed to parse content from ${filename}.`,
+      reason: error,
      documents: [],
    };
  }
-  console.log(`[Model Working]: Transcribing audio data to text`);
+  if (!content?.length) {
  const { text: content } = await transcriber(audioData, {
    chunk_length_s: 30,
    stride_length_s: 5,
  });
  if (!content.length) {
    console.error(`Resulting text content was empty for ${filename}.`);
    trashFile(fullFilePath);
    return {
@ -76,79 +70,4 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
  return { success: true, reason: null, documents: [document] };
 }
 async function convertToWavAudioData(sourcePath) {
  try {
    let buffer;
    const wavefile = require("wavefile");
    const ffmpeg = require("fluent-ffmpeg");
    const outFolder = path.resolve(__dirname, `../../storage/tmp`);
    if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true });
    const fileExtension = path.extname(sourcePath).toLowerCase();
    if (fileExtension !== ".wav") {
      console.log(
        `[Conversion Required] ${fileExtension} file detected - converting to .wav`
      );
      const outputFile = path.resolve(outFolder, `${v4()}.wav`);
      const convert = new Promise((resolve) => {
        ffmpeg(sourcePath)
          .toFormat("wav")
          .on("error", (error) => {
            console.error(`[Conversion Error] ${error.message}`);
            resolve(false);
          })
          .on("progress", (progress) =>
            console.log(
              `[Conversion Processing]: ${progress.targetSize}KB converted`
            )
          )
          .on("end", () => {
            console.log("[Conversion Complete]: File converted to .wav!");
            resolve(true);
          })
          .save(outputFile);
      });
      const success = await convert;
      if (!success)
        throw new Error(
          "[Conversion Failed]: Could not convert file to .wav format!"
        );
      const chunks = [];
      const stream = fs.createReadStream(outputFile);
      for await (let chunk of stream) chunks.push(chunk);
      buffer = Buffer.concat(chunks);
      fs.rmSync(outputFile);
    } else {
      const chunks = [];
      const stream = fs.createReadStream(sourcePath);
      for await (let chunk of stream) chunks.push(chunk);
      buffer = Buffer.concat(chunks);
    }
    const wavFile = new wavefile.WaveFile(buffer);
    wavFile.toBitDepth("32f");
    wavFile.toSampleRate(16000);
    let audioData = wavFile.getSamples();
    if (Array.isArray(audioData)) {
      if (audioData.length > 1) {
        const SCALING_FACTOR = Math.sqrt(2);
        // Merge channels into first channel to save memory
        for (let i = 0; i < audioData[0].length; ++i) {
          audioData[0][i] =
            (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
        }
      }
      audioData = audioData[0];
    }
    return audioData;
  } catch (error) {
    console.error(`convertToWavAudioData`, error);
    return null;
  }
 }
 module.exports = asAudio;
--- a/collector/processSingleFile/index.js
+++ b/collector/processSingleFile/index.js
@ -7,7 +7,7 @@ const {
 const { trashFile, isTextType } = require("../utils/files");
 const RESERVED_FILES = ["__HOTDIR__.md"];
-async function processSingleFile(targetFilename) {
+async function processSingleFile(targetFilename, options = {}) {
  const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
  if (RESERVED_FILES.includes(targetFilename))
    return {
@ -54,6 +54,7 @@ async function processSingleFile(targetFilename) {
  return await FileTypeProcessor({
    fullFilePath,
    filename: targetFilename,
    options,
  });
 }
--- a/collector/utils/WhisperProviders/OpenAiWhisper.js
+++ b/collector/utils/WhisperProviders/OpenAiWhisper.js
@ -0,0 +1,44 @@
 const fs = require("fs");
 class OpenAiWhisper {
  constructor({ options }) {
    const { Configuration, OpenAIApi } = require("openai");
    if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
    const config = new Configuration({
      apiKey: options.openAiKey,
    });
    this.openai = new OpenAIApi(config);
    this.model = "whisper-1";
    this.temperature = 0;
    this.#log("Initialized.");
  }
  #log(text, ...args) {
    console.log(`\x1b[32m[OpenAiWhisper]\x1b[0m ${text}`, ...args);
  }
  async processFile(fullFilePath) {
    return await this.openai
      .createTranscription(
        fs.createReadStream(fullFilePath),
        this.model,
        undefined,
        "text",
        this.temperature
      )
      .then((res) => {
        if (res.hasOwnProperty("data"))
          return { content: res.data, error: null };
        return { content: "", error: "No content was able to be transcribed." };
      })
      .catch((e) => {
        this.#log(`Could not get any response from openai whisper`, e.message);
        return { content: "", error: e.message };
      });
  }
 }
 module.exports = {
  OpenAiWhisper,
 };
--- a/collector/utils/WhisperProviders/localWhisper.js
+++ b/collector/utils/WhisperProviders/localWhisper.js
@ -1,5 +1,6 @@
 const path = require("path");
 const fs = require("fs");
 const path = require("path");
 const { v4 } = require("uuid");
 class LocalWhisper {
  constructor() {
@ -16,12 +17,94 @@ class LocalWhisper {
    // Make directory when it does not exist in existing installations
    if (!fs.existsSync(this.cacheDir))
      fs.mkdirSync(this.cacheDir, { recursive: true });
    this.#log("Initialized.");
  }
  #log(text, ...args) {
    console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
  }
  async #convertToWavAudioData(sourcePath) {
    try {
      let buffer;
      const wavefile = require("wavefile");
      const ffmpeg = require("fluent-ffmpeg");
      const outFolder = path.resolve(__dirname, `../../storage/tmp`);
      if (!fs.existsSync(outFolder))
        fs.mkdirSync(outFolder, { recursive: true });
      const fileExtension = path.extname(sourcePath).toLowerCase();
      if (fileExtension !== ".wav") {
        this.#log(
          `File conversion required! ${fileExtension} file detected - converting to .wav`
        );
        const outputFile = path.resolve(outFolder, `${v4()}.wav`);
        const convert = new Promise((resolve) => {
          ffmpeg(sourcePath)
            .toFormat("wav")
            .on("error", (error) => {
              this.#log(`Conversion Error! ${error.message}`);
              resolve(false);
            })
            .on("progress", (progress) =>
              this.#log(
                `Conversion Processing! ${progress.targetSize}KB converted`
              )
            )
            .on("end", () => {
              this.#log(`Conversion Complete! File converted to .wav!`);
              resolve(true);
            })
            .save(outputFile);
        });
        const success = await convert;
        if (!success)
          throw new Error(
            "[Conversion Failed]: Could not convert file to .wav format!"
          );
        const chunks = [];
        const stream = fs.createReadStream(outputFile);
        for await (let chunk of stream) chunks.push(chunk);
        buffer = Buffer.concat(chunks);
        fs.rmSync(outputFile);
      } else {
        const chunks = [];
        const stream = fs.createReadStream(sourcePath);
        for await (let chunk of stream) chunks.push(chunk);
        buffer = Buffer.concat(chunks);
      }
      const wavFile = new wavefile.WaveFile(buffer);
      wavFile.toBitDepth("32f");
      wavFile.toSampleRate(16000);
      let audioData = wavFile.getSamples();
      if (Array.isArray(audioData)) {
        if (audioData.length > 1) {
          const SCALING_FACTOR = Math.sqrt(2);
          // Merge channels into first channel to save memory
          for (let i = 0; i < audioData[0].length; ++i) {
            audioData[0][i] =
              (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
          }
        }
        audioData = audioData[0];
      }
      return audioData;
    } catch (error) {
      console.error(`convertToWavAudioData`, error);
      return null;
    }
  }
  async client() {
    if (!fs.existsSync(this.modelPath)) {
-      console.log(
+      this.#log(
-        "\x1b[34m[INFO]\x1b[0m The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)\n\n"
+        `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)`
      );
    }
@ -48,10 +131,45 @@ class LocalWhisper {
          : {}),
      });
    } catch (error) {
-      console.error("Failed to load the native whisper model:", error);
+      this.#log("Failed to load the native whisper model:", error);
      throw error;
    }
  }
  async processFile(fullFilePath, filename) {
    try {
      const transcriberPromise = new Promise((resolve) =>
        this.client().then((client) => resolve(client))
      );
      const audioDataPromise = new Promise((resolve) =>
        this.#convertToWavAudioData(fullFilePath).then((audioData) =>
          resolve(audioData)
        )
      );
      const [audioData, transcriber] = await Promise.all([
        audioDataPromise,
        transcriberPromise,
      ]);
      if (!audioData) {
        this.#log(`Failed to parse content from ${filename}.`);
        return {
          content: null,
          error: `Failed to parse content from ${filename}.`,
        };
      }
      this.#log(`Transcribing audio data to text...`);
      const { text } = await transcriber(audioData, {
        chunk_length_s: 30,
        stride_length_s: 5,
      });
      return { content: text, error: null };
    } catch (error) {
      return { content: null, error: error.message };
    }
  }
 }
 module.exports = {
--- a/collector/yarn.lock
+++ b/collector/yarn.lock
@ -372,6 +372,13 @@ asynckit@^0.4.0:
  resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
  integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
 axios@^0.26.0:
  version "0.26.1"
  resolved "https://registry.yarnpkg.com/axios/-/axios-0.26.1.tgz#1ede41c51fcf51bbbd6fd43669caaa4f0495aaa9"
  integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==
  dependencies:
    follow-redirects "^1.14.8"
 b4a@^1.6.4:
  version "1.6.4"
  resolved "https://registry.yarnpkg.com/b4a/-/b4a-1.6.4.tgz#ef1c1422cae5ce6535ec191baeed7567443f36c9"
@ -1203,6 +1210,11 @@ fluent-ffmpeg@^2.1.2:
    async ">=0.2.9"
    which "^1.1.1"
 follow-redirects@^1.14.8:
  version "1.15.6"
  resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b"
  integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==
 form-data-encoder@1.7.2:
  version "1.7.2"
  resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
@ -2304,6 +2316,14 @@ onnxruntime-web@1.14.0:
    onnxruntime-common "~1.14.0"
    platform "^1.3.6"
 openai@^3.2.1:
  version "3.3.0"
  resolved "https://registry.yarnpkg.com/openai/-/openai-3.3.0.tgz#a6408016ad0945738e1febf43f2fccca83a3f532"
  integrity sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==
  dependencies:
    axios "^0.26.0"
    form-data "^4.0.0"
 openai@^4.19.0:
  version "4.20.1"
  resolved "https://registry.yarnpkg.com/openai/-/openai-4.20.1.tgz#afa0d496d125b5a0f6cebcb4b9aeabf71e00214e"
--- a/docker/.env.example
+++ b/docker/.env.example
@ -131,6 +131,16 @@ GID='1000'
 # ASTRA_DB_APPLICATION_TOKEN=
 # ASTRA_DB_ENDPOINT=
 ###########################################
 ######## Audio Model Selection ############
 ###########################################
 # (default) use built-in whisper-small model.
 # WHISPER_PROVIDER="local"
 # use openai hosted whisper model.
 # WHISPER_PROVIDER="openai"
 # OPEN_AI_KEY=sk-xxxxxxxx
 # CLOUD DEPLOYMENT VARIRABLES ONLY
 # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
 # DISABLE_TELEMETRY="false"
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@ -29,6 +29,9 @@ const GeneralApiKeys = lazy(() => import("@/pages/GeneralSettings/ApiKeys"));
 const GeneralLLMPreference = lazy(
  () => import("@/pages/GeneralSettings/LLMPreference")
 );
 const GeneralTranscriptionPreference = lazy(
  () => import("@/pages/GeneralSettings/TranscriptionPreference")
 );
 const GeneralEmbeddingPreference = lazy(
  () => import("@/pages/GeneralSettings/EmbeddingPreference")
 );
@ -76,6 +79,12 @@ export default function App() {
                path="/settings/llm-preference"
                element={<AdminRoute Component={GeneralLLMPreference} />}
              />
              <Route
                path="/settings/transcription-preference"
                element={
                  <AdminRoute Component={GeneralTranscriptionPreference} />
                }
              />
              <Route
                path="/settings/embedding-preference"
                element={<AdminRoute Component={GeneralEmbeddingPreference} />}
--- a/frontend/src/components/SettingsSidebar/index.jsx
+++ b/frontend/src/components/SettingsSidebar/index.jsx
@ -19,6 +19,7 @@ import {
  Notepad,
  CodeBlock,
  Barcode,
  ClosedCaptioning,
 } from "@phosphor-icons/react";
 import useUser from "@/hooks/useUser";
 import { USER_BACKGROUND_COLOR } from "@/utils/constants";
@ -278,9 +279,17 @@ const SidebarOptions = ({ user = null }) => (
      flex={true}
      allowedRole={["admin"]}
    />
    <Option
      href={paths.settings.transcriptionPreference()}
      btnText="Transcription Model"
      icon={<ClosedCaptioning className="h-5 w-5 flex-shrink-0" />}
      user={user}
      flex={true}
      allowedRole={["admin"]}
    />
    <Option
      href={paths.settings.embeddingPreference()}
-      btnText="Embedding Preference"
+      btnText="Embedding Model"
      icon={<FileCode className="h-5 w-5 flex-shrink-0" />}
      user={user}
      flex={true}
--- a/frontend/src/components/TranscriptionSelection/NativeTranscriptionOptions/index.jsx
+++ b/frontend/src/components/TranscriptionSelection/NativeTranscriptionOptions/index.jsx
@ -0,0 +1,38 @@
 import { Gauge } from "@phosphor-icons/react";
 export default function NativeTranscriptionOptions() {
  return (
    <div className="w-full flex flex-col gap-y-4">
      <div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
        <div className="gap-x-2 flex items-center">
          <Gauge size={25} />
          <p className="text-sm">
            Using the local whisper model on machines with limited RAM or CPU
            can stall AnythingLLM when processing media files.
            <br />
            We recommend at least 2GB of RAM and upload files &lt;10Mb.
            <br />
            <br />
            <i>
              The built-in model will automatically download on the first use.
            </i>
          </p>
        </div>
      </div>
      <div className="w-full flex items-center gap-4">
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-4">
            Model Selection
          </label>
          <select
            disabled={true}
            className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
          >
            <option disabled={true} selected={true}>
              Xenova/whisper-small
            </option>
          </select>
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/TranscriptionSelection/OpenAiOptions/index.jsx
+++ b/frontend/src/components/TranscriptionSelection/OpenAiOptions/index.jsx
@ -0,0 +1,41 @@
 import { useState } from "react";
 export default function OpenAiWhisperOptions({ settings }) {
  const [inputValue, setInputValue] = useState(settings?.OpenAiKey);
  const [_openAIKey, setOpenAIKey] = useState(settings?.OpenAiKey);
  return (
    <div className="flex gap-x-4">
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          API Key
        </label>
        <input
          type="password"
          name="OpenAiKey"
          className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
          placeholder="OpenAI API Key"
          defaultValue={settings?.OpenAiKey ? "*".repeat(20) : ""}
          required={true}
          autoComplete="off"
          spellCheck={false}
          onChange={(e) => setInputValue(e.target.value)}
          onBlur={() => setOpenAIKey(inputValue)}
        />
      </div>
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-4">
          Whisper Model
        </label>
        <select
          disabled={true}
          className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
        >
          <option disabled={true} selected={true}>
            Whisper Large
          </option>
        </select>
      </div>
    </div>
  );
 }
--- a/frontend/src/pages/GeneralSettings/TranscriptionPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/TranscriptionPreference/index.jsx
@ -0,0 +1,180 @@
 import React, { useEffect, useState } from "react";
 import { isMobile } from "react-device-detect";
 import Sidebar from "@/components/SettingsSidebar";
 import System from "@/models/system";
 import showToast from "@/utils/toast";
 import PreLoader from "@/components/Preloader";
 import OpenAiLogo from "@/media/llmprovider/openai.png";
 import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
 import OpenAiWhisperOptions from "@/components/TranscriptionSelection/OpenAiOptions";
 import NativeTranscriptionOptions from "@/components/TranscriptionSelection/NativeTranscriptionOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { MagnifyingGlass } from "@phosphor-icons/react";
 export default function TranscriptionModelPreference() {
  const [saving, setSaving] = useState(false);
  const [hasChanges, setHasChanges] = useState(false);
  const [settings, setSettings] = useState(null);
  const [loading, setLoading] = useState(true);
  const [searchQuery, setSearchQuery] = useState("");
  const [filteredProviders, setFilteredProviders] = useState([]);
  const [selectedProvider, setSelectedProvider] = useState(null);
  const handleSubmit = async (e) => {
    e.preventDefault();
    const form = e.target;
    const data = { WhisperProvider: selectedProvider };
    const formData = new FormData(form);
    for (var [key, value] of formData.entries()) data[key] = value;
    const { error } = await System.updateSystem(data);
    setSaving(true);
    if (error) {
      showToast(`Failed to save preferences: ${error}`, "error");
    } else {
      showToast("Transcription preferences saved successfully.", "success");
    }
    setSaving(false);
    setHasChanges(!!error);
  };
  const updateProviderChoice = (selection) => {
    setSelectedProvider(selection);
    setHasChanges(true);
  };
  useEffect(() => {
    async function fetchKeys() {
      const _settings = await System.keys();
      setSettings(_settings);
      setSelectedProvider(_settings?.WhisperProvider || "local");
      setLoading(false);
    }
    fetchKeys();
  }, []);
  useEffect(() => {
    const filtered = PROVIDERS.filter((provider) =>
      provider.name.toLowerCase().includes(searchQuery.toLowerCase())
    );
    setFilteredProviders(filtered);
  }, [searchQuery, selectedProvider]);
  const PROVIDERS = [
    {
      name: "OpenAI",
      value: "openai",
      logo: OpenAiLogo,
      options: <OpenAiWhisperOptions settings={settings} />,
      description:
        "Leverage the OpenAI Whisper-large model using your API key.",
    },
    {
      name: "AnythingLLM Built-In",
      value: "local",
      logo: AnythingLLMIcon,
      options: <NativeTranscriptionOptions settings={settings} />,
      description: "Run a built-in whisper model on this instance privately.",
    },
  ];
  return (
    <div className="w-screen h-screen overflow-hidden bg-sidebar flex">
      <Sidebar />
      {loading ? (
        <div
          style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
          className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
        >
          <div className="w-full h-full flex justify-center items-center">
            <PreLoader />
          </div>
        </div>
      ) : (
        <div
          style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
          className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
        >
          <form onSubmit={handleSubmit} className="flex w-full">
            <div className="flex flex-col w-full px-1 md:pl-6 md:pr-[86px] md:py-6 py-16">
              <div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
                <div className="flex gap-x-4 items-center">
                  <p className="text-lg leading-6 font-bold text-white">
                    Transcription Model Preference
                  </p>
                  {hasChanges && (
                    <button
                      type="submit"
                      disabled={saving}
                      className="flex items-center gap-x-2 px-4 py-2 rounded-lg bg-[#2C2F36] text-white text-sm hover:bg-[#3D4147] shadow-md border border-[#3D4147]"
                    >
                      {saving ? "Saving..." : "Save changes"}
                    </button>
                  )}
                </div>
                <p className="text-xs leading-[18px] font-base text-white text-opacity-60">
                  These are the credentials and settings for your preferred
                  transcription model provider. Its important these keys are
                  current and correct or else media files and audio will not
                  transcribe.
                </p>
              </div>
              <div className="text-sm font-medium text-white mt-6 mb-4">
                Transcription Providers
              </div>
              <div className="w-full">
                <div className="w-full relative border-slate-300/20 shadow border-4 rounded-xl text-white">
                  <div className="w-full p-4 absolute top-0 rounded-t-lg backdrop-blur-sm">
                    <div className="w-full flex items-center sticky top-0">
                      <MagnifyingGlass
                        size={16}
                        weight="bold"
                        className="absolute left-4 z-30 text-white"
                      />
                      <input
                        type="text"
                        placeholder="Search audio transcription providers"
                        className="bg-zinc-600 z-20 pl-10 h-[38px] rounded-full w-full px-4 py-1 text-sm border-2 border-slate-300/40 outline-none focus:border-white text-white"
                        onChange={(e) => setSearchQuery(e.target.value)}
                        autoComplete="off"
                        onKeyDown={(e) => {
                          if (e.key === "Enter") e.preventDefault();
                        }}
                      />
                    </div>
                  </div>
                  <div className="px-4 pt-[70px] flex flex-col gap-y-1 max-h-[390px] overflow-y-auto no-scroll pb-4">
                    {filteredProviders.map((provider) => {
                      return (
                        <LLMItem
                          key={provider.name}
                          name={provider.name}
                          value={provider.value}
                          image={provider.logo}
                          description={provider.description}
                          checked={selectedProvider === provider.value}
                          onClick={() => updateProviderChoice(provider.value)}
                        />
                      );
                    })}
                  </div>
                </div>
                <div
                  onChange={() => setHasChanges(true)}
                  className="mt-4 flex flex-col gap-y-1"
                >
                  {selectedProvider &&
                    PROVIDERS.find(
                      (provider) => provider.value === selectedProvider
                    )?.options}
                </div>
              </div>
            </div>
          </form>
        </div>
      )}
    </div>
  );
 }
--- a/frontend/src/utils/paths.js
+++ b/frontend/src/utils/paths.js
@ -92,6 +92,9 @@ export default {
    llmPreference: () => {
      return "/settings/llm-preference";
    },
    transcriptionPreference: () => {
      return "/settings/transcription-preference";
    },
    embeddingPreference: () => {
      return "/settings/embedding-preference";
    },
--- a/server/.env.example
+++ b/server/.env.example
@ -128,6 +128,16 @@ VECTOR_DB="lancedb"
 # ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com"
 # ZILLIZ_API_TOKEN=api-token-here
 ###########################################
 ######## Audio Model Selection ############
 ###########################################
 # (default) use built-in whisper-small model.
 WHISPER_PROVIDER="local"
 # use openai hosted whisper model.
 # WHISPER_PROVIDER="openai"
 # OPEN_AI_KEY=sk-xxxxxxxx
 # CLOUD DEPLOYMENT VARIRABLES ONLY
 # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
 # STORAGE_DIR= # absolute filesystem path with no trailing slash
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -258,6 +258,7 @@ const SystemSettings = {
            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
          }
        : {}),
      WhisperProvider: process.env.WHISPER_PROVIDER || "local",
    };
  },
--- a/server/storage/models/README.md
+++ b/server/storage/models/README.md
@ -14,6 +14,9 @@ AnythingLLM allows you to upload various audio and video formats as source docum
 Once transcribed you can embed these transcriptions into your workspace like you would any other file! 
 **Other external model/transcription providers are also live.**
 - [OpenAI Whisper via API key.](https://openai.com/research/whisper)
 ## Text generation (LLM selection)
 > [!IMPORTANT]
 > Use of a locally running LLM model is **experimental** and may behave unexpectedly, crash, or not function at all.
--- a/server/utils/collectorApi/index.js
+++ b/server/utils/collectorApi/index.js
@ -5,13 +5,20 @@
 class CollectorApi {
  constructor() {
-    this.endpoint = "http://0.0.0.0:8888";
+    this.endpoint = `http://0.0.0.0:${process.env.COLLECTOR_PORT || 8888}`;
  }
  log(text, ...args) {
    console.log(`\x1b[36m[CollectorApi]\x1b[0m ${text}`, ...args);
  }
  #attachOptions() {
    return {
      whisperProvider: process.env.WHISPER_PROVIDER || "local",
      openAiKey: process.env.OPEN_AI_KEY || null,
    };
  }
  async online() {
    return await fetch(this.endpoint)
      .then((res) => res.ok)
@ -38,7 +45,10 @@ class CollectorApi {
      headers: {
        "Content-Type": "application/json",
      },
-      body: JSON.stringify({ filename }),
+      body: JSON.stringify({
        filename,
        options: this.#attachOptions(),
      }),
    })
      .then((res) => {
        if (!res.ok) throw new Error("Response could not be completed");
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -269,6 +269,13 @@ const KEY_MAPPING = {
    checks: [isNotEmpty],
  },
  // Whisper (transcription) providers
  WhisperProvider: {
    envKey: "WHISPER_PROVIDER",
    checks: [isNotEmpty, supportedTranscriptionProvider],
    postUpdate: [],
  },
  // System Settings
  AuthToken: {
    envKey: "AUTH_TOKEN",
@ -351,6 +358,13 @@ function supportedLLM(input = "") {
  return validSelection ? null : `${input} is not a valid LLM provider.`;
 }
 function supportedTranscriptionProvider(input = "") {
  const validSelection = ["openai", "local"].includes(input);
  return validSelection
    ? null
    : `${input} is not a valid transcription model provider.`;
 }
 function validGeminiModel(input = "") {
  const validModels = ["gemini-pro"];
  return validModels.includes(input)