Support external transcription providers (#909)

* Support External Transcription providers

* patch files

* update docs

* fix return data
This commit is contained in:
Timothy Carambat 2024-03-14 15:43:26 -07:00 committed by GitHub
parent 1352b18b5f
commit 0ada882991
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 541 additions and 110 deletions

View File

@ -25,7 +25,7 @@ app.use(
); );
app.post("/process", async function (request, response) { app.post("/process", async function (request, response) {
const { filename } = reqBody(request); const { filename, options = {} } = reqBody(request);
try { try {
const targetFilename = path const targetFilename = path
.normalize(filename) .normalize(filename)
@ -34,7 +34,7 @@ app.post("/process", async function (request, response) {
success, success,
reason, reason,
documents = [], documents = [],
} = await processSingleFile(targetFilename); } = await processSingleFile(targetFilename, options);
response response
.status(200) .status(200)
.json({ filename: targetFilename, success, reason, documents }); .json({ filename: targetFilename, success, reason, documents });

View File

@ -33,6 +33,7 @@
"moment": "^2.29.4", "moment": "^2.29.4",
"multer": "^1.4.5-lts.1", "multer": "^1.4.5-lts.1",
"officeparser": "^4.0.5", "officeparser": "^4.0.5",
"openai": "^3.2.1",
"pdf-parse": "^1.1.1", "pdf-parse": "^1.1.1",
"puppeteer": "~21.5.2", "puppeteer": "~21.5.2",
"slugify": "^1.6.6", "slugify": "^1.6.6",
@ -46,4 +47,4 @@
"nodemon": "^2.0.22", "nodemon": "^2.0.22",
"prettier": "^2.4.1" "prettier": "^2.4.1"
} }
} }

View File

@ -1,5 +1,3 @@
const fs = require("fs");
const path = require("path");
const { v4 } = require("uuid"); const { v4 } = require("uuid");
const { const {
createdDate, createdDate,
@ -9,39 +7,35 @@ const {
const { tokenizeString } = require("../../utils/tokenizer"); const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify"); const { default: slugify } = require("slugify");
const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper"); const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper");
const { OpenAiWhisper } = require("../../utils/WhisperProviders/OpenAiWhisper");
async function asAudio({ fullFilePath = "", filename = "" }) { const WHISPER_PROVIDERS = {
const whisper = new LocalWhisper(); openai: OpenAiWhisper,
local: LocalWhisper,
};
async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
options?.whisperProvider
)
? WHISPER_PROVIDERS[options?.whisperProvider]
: WHISPER_PROVIDERS.local;
console.log(`-- Working ${filename} --`); console.log(`-- Working ${filename} --`);
const transcriberPromise = new Promise((resolve) => const whisper = new WhisperProvider({ options });
whisper.client().then((client) => resolve(client)) const { content, error } = await whisper.processFile(fullFilePath, filename);
);
const audioDataPromise = new Promise((resolve) =>
convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData))
);
const [audioData, transcriber] = await Promise.all([
audioDataPromise,
transcriberPromise,
]);
if (!audioData) { if (!!error) {
console.error(`Failed to parse content from ${filename}.`); console.error(`Error encountered for parsing of ${filename}.`);
trashFile(fullFilePath); trashFile(fullFilePath);
return { return {
success: false, success: false,
reason: `Failed to parse content from ${filename}.`, reason: error,
documents: [], documents: [],
}; };
} }
console.log(`[Model Working]: Transcribing audio data to text`); if (!content?.length) {
const { text: content } = await transcriber(audioData, {
chunk_length_s: 30,
stride_length_s: 5,
});
if (!content.length) {
console.error(`Resulting text content was empty for ${filename}.`); console.error(`Resulting text content was empty for ${filename}.`);
trashFile(fullFilePath); trashFile(fullFilePath);
return { return {
@ -76,79 +70,4 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };
} }
async function convertToWavAudioData(sourcePath) {
try {
let buffer;
const wavefile = require("wavefile");
const ffmpeg = require("fluent-ffmpeg");
const outFolder = path.resolve(__dirname, `../../storage/tmp`);
if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true });
const fileExtension = path.extname(sourcePath).toLowerCase();
if (fileExtension !== ".wav") {
console.log(
`[Conversion Required] ${fileExtension} file detected - converting to .wav`
);
const outputFile = path.resolve(outFolder, `${v4()}.wav`);
const convert = new Promise((resolve) => {
ffmpeg(sourcePath)
.toFormat("wav")
.on("error", (error) => {
console.error(`[Conversion Error] ${error.message}`);
resolve(false);
})
.on("progress", (progress) =>
console.log(
`[Conversion Processing]: ${progress.targetSize}KB converted`
)
)
.on("end", () => {
console.log("[Conversion Complete]: File converted to .wav!");
resolve(true);
})
.save(outputFile);
});
const success = await convert;
if (!success)
throw new Error(
"[Conversion Failed]: Could not convert file to .wav format!"
);
const chunks = [];
const stream = fs.createReadStream(outputFile);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
fs.rmSync(outputFile);
} else {
const chunks = [];
const stream = fs.createReadStream(sourcePath);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
}
const wavFile = new wavefile.WaveFile(buffer);
wavFile.toBitDepth("32f");
wavFile.toSampleRate(16000);
let audioData = wavFile.getSamples();
if (Array.isArray(audioData)) {
if (audioData.length > 1) {
const SCALING_FACTOR = Math.sqrt(2);
// Merge channels into first channel to save memory
for (let i = 0; i < audioData[0].length; ++i) {
audioData[0][i] =
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
}
}
audioData = audioData[0];
}
return audioData;
} catch (error) {
console.error(`convertToWavAudioData`, error);
return null;
}
}
module.exports = asAudio; module.exports = asAudio;

View File

@ -7,7 +7,7 @@ const {
const { trashFile, isTextType } = require("../utils/files"); const { trashFile, isTextType } = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"]; const RESERVED_FILES = ["__HOTDIR__.md"];
async function processSingleFile(targetFilename) { async function processSingleFile(targetFilename, options = {}) {
const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename); const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
if (RESERVED_FILES.includes(targetFilename)) if (RESERVED_FILES.includes(targetFilename))
return { return {
@ -54,6 +54,7 @@ async function processSingleFile(targetFilename) {
return await FileTypeProcessor({ return await FileTypeProcessor({
fullFilePath, fullFilePath,
filename: targetFilename, filename: targetFilename,
options,
}); });
} }

View File

@ -0,0 +1,44 @@
const fs = require("fs");
class OpenAiWhisper {
constructor({ options }) {
const { Configuration, OpenAIApi } = require("openai");
if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
const config = new Configuration({
apiKey: options.openAiKey,
});
this.openai = new OpenAIApi(config);
this.model = "whisper-1";
this.temperature = 0;
this.#log("Initialized.");
}
#log(text, ...args) {
console.log(`\x1b[32m[OpenAiWhisper]\x1b[0m ${text}`, ...args);
}
async processFile(fullFilePath) {
return await this.openai
.createTranscription(
fs.createReadStream(fullFilePath),
this.model,
undefined,
"text",
this.temperature
)
.then((res) => {
if (res.hasOwnProperty("data"))
return { content: res.data, error: null };
return { content: "", error: "No content was able to be transcribed." };
})
.catch((e) => {
this.#log(`Could not get any response from openai whisper`, e.message);
return { content: "", error: e.message };
});
}
}
module.exports = {
OpenAiWhisper,
};

View File

@ -1,5 +1,6 @@
const path = require("path");
const fs = require("fs"); const fs = require("fs");
const path = require("path");
const { v4 } = require("uuid");
class LocalWhisper { class LocalWhisper {
constructor() { constructor() {
@ -16,12 +17,94 @@ class LocalWhisper {
// Make directory when it does not exist in existing installations // Make directory when it does not exist in existing installations
if (!fs.existsSync(this.cacheDir)) if (!fs.existsSync(this.cacheDir))
fs.mkdirSync(this.cacheDir, { recursive: true }); fs.mkdirSync(this.cacheDir, { recursive: true });
this.#log("Initialized.");
}
#log(text, ...args) {
console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
}
async #convertToWavAudioData(sourcePath) {
try {
let buffer;
const wavefile = require("wavefile");
const ffmpeg = require("fluent-ffmpeg");
const outFolder = path.resolve(__dirname, `../../storage/tmp`);
if (!fs.existsSync(outFolder))
fs.mkdirSync(outFolder, { recursive: true });
const fileExtension = path.extname(sourcePath).toLowerCase();
if (fileExtension !== ".wav") {
this.#log(
`File conversion required! ${fileExtension} file detected - converting to .wav`
);
const outputFile = path.resolve(outFolder, `${v4()}.wav`);
const convert = new Promise((resolve) => {
ffmpeg(sourcePath)
.toFormat("wav")
.on("error", (error) => {
this.#log(`Conversion Error! ${error.message}`);
resolve(false);
})
.on("progress", (progress) =>
this.#log(
`Conversion Processing! ${progress.targetSize}KB converted`
)
)
.on("end", () => {
this.#log(`Conversion Complete! File converted to .wav!`);
resolve(true);
})
.save(outputFile);
});
const success = await convert;
if (!success)
throw new Error(
"[Conversion Failed]: Could not convert file to .wav format!"
);
const chunks = [];
const stream = fs.createReadStream(outputFile);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
fs.rmSync(outputFile);
} else {
const chunks = [];
const stream = fs.createReadStream(sourcePath);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
}
const wavFile = new wavefile.WaveFile(buffer);
wavFile.toBitDepth("32f");
wavFile.toSampleRate(16000);
let audioData = wavFile.getSamples();
if (Array.isArray(audioData)) {
if (audioData.length > 1) {
const SCALING_FACTOR = Math.sqrt(2);
// Merge channels into first channel to save memory
for (let i = 0; i < audioData[0].length; ++i) {
audioData[0][i] =
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
}
}
audioData = audioData[0];
}
return audioData;
} catch (error) {
console.error(`convertToWavAudioData`, error);
return null;
}
} }
async client() { async client() {
if (!fs.existsSync(this.modelPath)) { if (!fs.existsSync(this.modelPath)) {
console.log( this.#log(
"\x1b[34m[INFO]\x1b[0m The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)\n\n" `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)`
); );
} }
@ -48,10 +131,45 @@ class LocalWhisper {
: {}), : {}),
}); });
} catch (error) { } catch (error) {
console.error("Failed to load the native whisper model:", error); this.#log("Failed to load the native whisper model:", error);
throw error; throw error;
} }
} }
async processFile(fullFilePath, filename) {
try {
const transcriberPromise = new Promise((resolve) =>
this.client().then((client) => resolve(client))
);
const audioDataPromise = new Promise((resolve) =>
this.#convertToWavAudioData(fullFilePath).then((audioData) =>
resolve(audioData)
)
);
const [audioData, transcriber] = await Promise.all([
audioDataPromise,
transcriberPromise,
]);
if (!audioData) {
this.#log(`Failed to parse content from ${filename}.`);
return {
content: null,
error: `Failed to parse content from ${filename}.`,
};
}
this.#log(`Transcribing audio data to text...`);
const { text } = await transcriber(audioData, {
chunk_length_s: 30,
stride_length_s: 5,
});
return { content: text, error: null };
} catch (error) {
return { content: null, error: error.message };
}
}
} }
module.exports = { module.exports = {

View File

@ -372,6 +372,13 @@ asynckit@^0.4.0:
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79" resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q== integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
axios@^0.26.0:
version "0.26.1"
resolved "https://registry.yarnpkg.com/axios/-/axios-0.26.1.tgz#1ede41c51fcf51bbbd6fd43669caaa4f0495aaa9"
integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==
dependencies:
follow-redirects "^1.14.8"
b4a@^1.6.4: b4a@^1.6.4:
version "1.6.4" version "1.6.4"
resolved "https://registry.yarnpkg.com/b4a/-/b4a-1.6.4.tgz#ef1c1422cae5ce6535ec191baeed7567443f36c9" resolved "https://registry.yarnpkg.com/b4a/-/b4a-1.6.4.tgz#ef1c1422cae5ce6535ec191baeed7567443f36c9"
@ -1203,6 +1210,11 @@ fluent-ffmpeg@^2.1.2:
async ">=0.2.9" async ">=0.2.9"
which "^1.1.1" which "^1.1.1"
follow-redirects@^1.14.8:
version "1.15.6"
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b"
integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==
form-data-encoder@1.7.2: form-data-encoder@1.7.2:
version "1.7.2" version "1.7.2"
resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040" resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
@ -2304,6 +2316,14 @@ onnxruntime-web@1.14.0:
onnxruntime-common "~1.14.0" onnxruntime-common "~1.14.0"
platform "^1.3.6" platform "^1.3.6"
openai@^3.2.1:
version "3.3.0"
resolved "https://registry.yarnpkg.com/openai/-/openai-3.3.0.tgz#a6408016ad0945738e1febf43f2fccca83a3f532"
integrity sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==
dependencies:
axios "^0.26.0"
form-data "^4.0.0"
openai@^4.19.0: openai@^4.19.0:
version "4.20.1" version "4.20.1"
resolved "https://registry.yarnpkg.com/openai/-/openai-4.20.1.tgz#afa0d496d125b5a0f6cebcb4b9aeabf71e00214e" resolved "https://registry.yarnpkg.com/openai/-/openai-4.20.1.tgz#afa0d496d125b5a0f6cebcb4b9aeabf71e00214e"

View File

@ -131,6 +131,16 @@ GID='1000'
# ASTRA_DB_APPLICATION_TOKEN= # ASTRA_DB_APPLICATION_TOKEN=
# ASTRA_DB_ENDPOINT= # ASTRA_DB_ENDPOINT=
###########################################
######## Audio Model Selection ############
###########################################
# (default) use built-in whisper-small model.
# WHISPER_PROVIDER="local"
# use openai hosted whisper model.
# WHISPER_PROVIDER="openai"
# OPEN_AI_KEY=sk-xxxxxxxx
# CLOUD DEPLOYMENT VARIRABLES ONLY # CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# DISABLE_TELEMETRY="false" # DISABLE_TELEMETRY="false"

View File

@ -29,6 +29,9 @@ const GeneralApiKeys = lazy(() => import("@/pages/GeneralSettings/ApiKeys"));
const GeneralLLMPreference = lazy( const GeneralLLMPreference = lazy(
() => import("@/pages/GeneralSettings/LLMPreference") () => import("@/pages/GeneralSettings/LLMPreference")
); );
const GeneralTranscriptionPreference = lazy(
() => import("@/pages/GeneralSettings/TranscriptionPreference")
);
const GeneralEmbeddingPreference = lazy( const GeneralEmbeddingPreference = lazy(
() => import("@/pages/GeneralSettings/EmbeddingPreference") () => import("@/pages/GeneralSettings/EmbeddingPreference")
); );
@ -76,6 +79,12 @@ export default function App() {
path="/settings/llm-preference" path="/settings/llm-preference"
element={<AdminRoute Component={GeneralLLMPreference} />} element={<AdminRoute Component={GeneralLLMPreference} />}
/> />
<Route
path="/settings/transcription-preference"
element={
<AdminRoute Component={GeneralTranscriptionPreference} />
}
/>
<Route <Route
path="/settings/embedding-preference" path="/settings/embedding-preference"
element={<AdminRoute Component={GeneralEmbeddingPreference} />} element={<AdminRoute Component={GeneralEmbeddingPreference} />}

View File

@ -19,6 +19,7 @@ import {
Notepad, Notepad,
CodeBlock, CodeBlock,
Barcode, Barcode,
ClosedCaptioning,
} from "@phosphor-icons/react"; } from "@phosphor-icons/react";
import useUser from "@/hooks/useUser"; import useUser from "@/hooks/useUser";
import { USER_BACKGROUND_COLOR } from "@/utils/constants"; import { USER_BACKGROUND_COLOR } from "@/utils/constants";
@ -278,9 +279,17 @@ const SidebarOptions = ({ user = null }) => (
flex={true} flex={true}
allowedRole={["admin"]} allowedRole={["admin"]}
/> />
<Option
href={paths.settings.transcriptionPreference()}
btnText="Transcription Model"
icon={<ClosedCaptioning className="h-5 w-5 flex-shrink-0" />}
user={user}
flex={true}
allowedRole={["admin"]}
/>
<Option <Option
href={paths.settings.embeddingPreference()} href={paths.settings.embeddingPreference()}
btnText="Embedding Preference" btnText="Embedding Model"
icon={<FileCode className="h-5 w-5 flex-shrink-0" />} icon={<FileCode className="h-5 w-5 flex-shrink-0" />}
user={user} user={user}
flex={true} flex={true}

View File

@ -0,0 +1,38 @@
import { Gauge } from "@phosphor-icons/react";
export default function NativeTranscriptionOptions() {
return (
<div className="w-full flex flex-col gap-y-4">
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
<div className="gap-x-2 flex items-center">
<Gauge size={25} />
<p className="text-sm">
Using the local whisper model on machines with limited RAM or CPU
can stall AnythingLLM when processing media files.
<br />
We recommend at least 2GB of RAM and upload files &lt;10Mb.
<br />
<br />
<i>
The built-in model will automatically download on the first use.
</i>
</p>
</div>
</div>
<div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Model Selection
</label>
<select
disabled={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
Xenova/whisper-small
</option>
</select>
</div>
</div>
</div>
);
}

View File

@ -0,0 +1,41 @@
import { useState } from "react";
export default function OpenAiWhisperOptions({ settings }) {
const [inputValue, setInputValue] = useState(settings?.OpenAiKey);
const [_openAIKey, setOpenAIKey] = useState(settings?.OpenAiKey);
return (
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="OpenAiKey"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenAI API Key"
defaultValue={settings?.OpenAiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
onChange={(e) => setInputValue(e.target.value)}
onBlur={() => setOpenAIKey(inputValue)}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Whisper Model
</label>
<select
disabled={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
Whisper Large
</option>
</select>
</div>
</div>
);
}

View File

@ -0,0 +1,180 @@
import React, { useEffect, useState } from "react";
import { isMobile } from "react-device-detect";
import Sidebar from "@/components/SettingsSidebar";
import System from "@/models/system";
import showToast from "@/utils/toast";
import PreLoader from "@/components/Preloader";
import OpenAiLogo from "@/media/llmprovider/openai.png";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import OpenAiWhisperOptions from "@/components/TranscriptionSelection/OpenAiOptions";
import NativeTranscriptionOptions from "@/components/TranscriptionSelection/NativeTranscriptionOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import { MagnifyingGlass } from "@phosphor-icons/react";
export default function TranscriptionModelPreference() {
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
const [settings, setSettings] = useState(null);
const [loading, setLoading] = useState(true);
const [searchQuery, setSearchQuery] = useState("");
const [filteredProviders, setFilteredProviders] = useState([]);
const [selectedProvider, setSelectedProvider] = useState(null);
const handleSubmit = async (e) => {
e.preventDefault();
const form = e.target;
const data = { WhisperProvider: selectedProvider };
const formData = new FormData(form);
for (var [key, value] of formData.entries()) data[key] = value;
const { error } = await System.updateSystem(data);
setSaving(true);
if (error) {
showToast(`Failed to save preferences: ${error}`, "error");
} else {
showToast("Transcription preferences saved successfully.", "success");
}
setSaving(false);
setHasChanges(!!error);
};
const updateProviderChoice = (selection) => {
setSelectedProvider(selection);
setHasChanges(true);
};
useEffect(() => {
async function fetchKeys() {
const _settings = await System.keys();
setSettings(_settings);
setSelectedProvider(_settings?.WhisperProvider || "local");
setLoading(false);
}
fetchKeys();
}, []);
useEffect(() => {
const filtered = PROVIDERS.filter((provider) =>
provider.name.toLowerCase().includes(searchQuery.toLowerCase())
);
setFilteredProviders(filtered);
}, [searchQuery, selectedProvider]);
const PROVIDERS = [
{
name: "OpenAI",
value: "openai",
logo: OpenAiLogo,
options: <OpenAiWhisperOptions settings={settings} />,
description:
"Leverage the OpenAI Whisper-large model using your API key.",
},
{
name: "AnythingLLM Built-In",
value: "local",
logo: AnythingLLMIcon,
options: <NativeTranscriptionOptions settings={settings} />,
description: "Run a built-in whisper model on this instance privately.",
},
];
return (
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
<Sidebar />
{loading ? (
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
>
<div className="w-full h-full flex justify-center items-center">
<PreLoader />
</div>
</div>
) : (
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
>
<form onSubmit={handleSubmit} className="flex w-full">
<div className="flex flex-col w-full px-1 md:pl-6 md:pr-[86px] md:py-6 py-16">
<div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
<div className="flex gap-x-4 items-center">
<p className="text-lg leading-6 font-bold text-white">
Transcription Model Preference
</p>
{hasChanges && (
<button
type="submit"
disabled={saving}
className="flex items-center gap-x-2 px-4 py-2 rounded-lg bg-[#2C2F36] text-white text-sm hover:bg-[#3D4147] shadow-md border border-[#3D4147]"
>
{saving ? "Saving..." : "Save changes"}
</button>
)}
</div>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60">
These are the credentials and settings for your preferred
transcription model provider. Its important these keys are
current and correct or else media files and audio will not
transcribe.
</p>
</div>
<div className="text-sm font-medium text-white mt-6 mb-4">
Transcription Providers
</div>
<div className="w-full">
<div className="w-full relative border-slate-300/20 shadow border-4 rounded-xl text-white">
<div className="w-full p-4 absolute top-0 rounded-t-lg backdrop-blur-sm">
<div className="w-full flex items-center sticky top-0">
<MagnifyingGlass
size={16}
weight="bold"
className="absolute left-4 z-30 text-white"
/>
<input
type="text"
placeholder="Search audio transcription providers"
className="bg-zinc-600 z-20 pl-10 h-[38px] rounded-full w-full px-4 py-1 text-sm border-2 border-slate-300/40 outline-none focus:border-white text-white"
onChange={(e) => setSearchQuery(e.target.value)}
autoComplete="off"
onKeyDown={(e) => {
if (e.key === "Enter") e.preventDefault();
}}
/>
</div>
</div>
<div className="px-4 pt-[70px] flex flex-col gap-y-1 max-h-[390px] overflow-y-auto no-scroll pb-4">
{filteredProviders.map((provider) => {
return (
<LLMItem
key={provider.name}
name={provider.name}
value={provider.value}
image={provider.logo}
description={provider.description}
checked={selectedProvider === provider.value}
onClick={() => updateProviderChoice(provider.value)}
/>
);
})}
</div>
</div>
<div
onChange={() => setHasChanges(true)}
className="mt-4 flex flex-col gap-y-1"
>
{selectedProvider &&
PROVIDERS.find(
(provider) => provider.value === selectedProvider
)?.options}
</div>
</div>
</div>
</form>
</div>
)}
</div>
);
}

View File

@ -92,6 +92,9 @@ export default {
llmPreference: () => { llmPreference: () => {
return "/settings/llm-preference"; return "/settings/llm-preference";
}, },
transcriptionPreference: () => {
return "/settings/transcription-preference";
},
embeddingPreference: () => { embeddingPreference: () => {
return "/settings/embedding-preference"; return "/settings/embedding-preference";
}, },

View File

@ -128,6 +128,16 @@ VECTOR_DB="lancedb"
# ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com" # ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com"
# ZILLIZ_API_TOKEN=api-token-here # ZILLIZ_API_TOKEN=api-token-here
###########################################
######## Audio Model Selection ############
###########################################
# (default) use built-in whisper-small model.
WHISPER_PROVIDER="local"
# use openai hosted whisper model.
# WHISPER_PROVIDER="openai"
# OPEN_AI_KEY=sk-xxxxxxxx
# CLOUD DEPLOYMENT VARIRABLES ONLY # CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# STORAGE_DIR= # absolute filesystem path with no trailing slash # STORAGE_DIR= # absolute filesystem path with no trailing slash

View File

@ -258,6 +258,7 @@ const SystemSettings = {
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
} }
: {}), : {}),
WhisperProvider: process.env.WHISPER_PROVIDER || "local",
}; };
}, },

View File

@ -14,6 +14,9 @@ AnythingLLM allows you to upload various audio and video formats as source docum
Once transcribed you can embed these transcriptions into your workspace like you would any other file! Once transcribed you can embed these transcriptions into your workspace like you would any other file!
**Other external model/transcription providers are also live.**
- [OpenAI Whisper via API key.](https://openai.com/research/whisper)
## Text generation (LLM selection) ## Text generation (LLM selection)
> [!IMPORTANT] > [!IMPORTANT]
> Use of a locally running LLM model is **experimental** and may behave unexpectedly, crash, or not function at all. > Use of a locally running LLM model is **experimental** and may behave unexpectedly, crash, or not function at all.

View File

@ -5,13 +5,20 @@
class CollectorApi { class CollectorApi {
constructor() { constructor() {
this.endpoint = "http://0.0.0.0:8888"; this.endpoint = `http://0.0.0.0:${process.env.COLLECTOR_PORT || 8888}`;
} }
log(text, ...args) { log(text, ...args) {
console.log(`\x1b[36m[CollectorApi]\x1b[0m ${text}`, ...args); console.log(`\x1b[36m[CollectorApi]\x1b[0m ${text}`, ...args);
} }
#attachOptions() {
return {
whisperProvider: process.env.WHISPER_PROVIDER || "local",
openAiKey: process.env.OPEN_AI_KEY || null,
};
}
async online() { async online() {
return await fetch(this.endpoint) return await fetch(this.endpoint)
.then((res) => res.ok) .then((res) => res.ok)
@ -38,7 +45,10 @@ class CollectorApi {
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
}, },
body: JSON.stringify({ filename }), body: JSON.stringify({
filename,
options: this.#attachOptions(),
}),
}) })
.then((res) => { .then((res) => {
if (!res.ok) throw new Error("Response could not be completed"); if (!res.ok) throw new Error("Response could not be completed");

View File

@ -269,6 +269,13 @@ const KEY_MAPPING = {
checks: [isNotEmpty], checks: [isNotEmpty],
}, },
// Whisper (transcription) providers
WhisperProvider: {
envKey: "WHISPER_PROVIDER",
checks: [isNotEmpty, supportedTranscriptionProvider],
postUpdate: [],
},
// System Settings // System Settings
AuthToken: { AuthToken: {
envKey: "AUTH_TOKEN", envKey: "AUTH_TOKEN",
@ -351,6 +358,13 @@ function supportedLLM(input = "") {
return validSelection ? null : `${input} is not a valid LLM provider.`; return validSelection ? null : `${input} is not a valid LLM provider.`;
} }
function supportedTranscriptionProvider(input = "") {
const validSelection = ["openai", "local"].includes(input);
return validSelection
? null
: `${input} is not a valid transcription model provider.`;
}
function validGeminiModel(input = "") { function validGeminiModel(input = "") {
const validModels = ["gemini-pro"]; const validModels = ["gemini-pro"];
return validModels.includes(input) return validModels.includes(input)