Obsidian data connector (#3798)

* add obsidian vault data connector

* lint

* add english translations

* normalize translations

* improve file parser and reader

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2025-05-12 13:45:27 -07:00 committed by GitHub
parent 492570dfed
commit 2b274c62b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
27 changed files with 551 additions and 27 deletions

View File

@ -4,6 +4,7 @@ const { resolveRepoLoader, resolveRepoLoaderFunction } = require("../utils/exten
const { reqBody } = require("../utils/http");
const { validURL } = require("../utils/url");
const RESYNC_METHODS = require("./resync");
const { loadObsidianVault } = require("../utils/extensions/ObsidianVault");
function extensions(app) {
if (!app) return;
@ -180,6 +181,27 @@ function extensions(app) {
return;
}
);
app.post(
"/ext/obsidian/vault",
[verifyPayloadIntegrity, setDataSigner],
async function (request, response) {
try {
const { files } = reqBody(request);
const result = await loadObsidianVault({ files });
response.status(200).json(result);
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: null,
});
}
return;
}
);
}
module.exports = extensions;

View File

@ -0,0 +1,91 @@
const { v4 } = require("uuid");
const { default: slugify } = require("slugify");
const path = require("path");
const fs = require("fs");
const {
writeToServerDocuments,
sanitizeFileName,
documentsFolder,
} = require("../../files");
function parseObsidianVaultPath(files = []) {
const possiblePaths = new Set();
files.forEach(
(file) => file?.path && possiblePaths.add(file.path.split("/")[0])
);
switch (possiblePaths.size) {
case 0:
return null;
case 1:
// The user specified a vault properly - so all files are in the same folder.
return possiblePaths.values().next().value;
default:
return null;
}
}
async function loadObsidianVault({ files = [] }) {
if (!files || files?.length === 0)
return { success: false, error: "No files provided" };
const vaultName = parseObsidianVaultPath(files);
const folderUUId = v4().slice(0, 4);
const outFolder = vaultName
? slugify(`obsidian-vault-${vaultName}-${folderUUId}`).toLowerCase()
: slugify(`obsidian-${folderUUId}`).toLowerCase();
const outFolderPath = path.resolve(documentsFolder, outFolder);
if (!fs.existsSync(outFolderPath))
fs.mkdirSync(outFolderPath, { recursive: true });
console.log(
`Processing ${files.length} files from Obsidian Vault ${
vaultName ? `"${vaultName}"` : ""
}`
);
const results = [];
for (const file of files) {
try {
const fullPageContent = file?.content;
// If the file has no content or is just whitespace, skip it.
if (!fullPageContent || fullPageContent.trim() === "") continue;
const data = {
id: v4(),
url: `obsidian://${file.path}`,
title: file.name,
docAuthor: "Obsidian Vault",
description: file.name,
docSource: "Obsidian Vault",
chunkSource: `obsidian://${file.path}`,
published: new Date().toLocaleString(),
wordCount: fullPageContent.split(" ").length,
pageContent: fullPageContent,
token_count_estimate: fullPageContent.length / 4, // rough estimate
};
const targetFileName = sanitizeFileName(
`${slugify(file.name)}-${data.id}`
);
writeToServerDocuments(data, targetFileName, outFolderPath);
results.push({ file: file.path, status: "success" });
} catch (e) {
console.error(`Failed to process ${file.path}:`, e);
results.push({ file: file.path, status: "failed", reason: e.message });
}
}
return {
success: true,
data: {
processed: results.filter((r) => r.status === "success").length,
failed: results.filter((r) => r.status === "failed").length,
total: files.length,
results,
destination: path.basename(outFolderPath),
},
};
}
module.exports = {
loadObsidianVault,
};

View File

@ -2,6 +2,15 @@ const fs = require("fs");
const path = require("path");
const { MimeDetector } = require("./mime");
/**
* The folder where documents are stored to be stored when
* processed by the collector.
*/
const documentsFolder =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../../server/storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`);
/**
* Checks if a file is text by checking the mime type and then falling back to buffer inspection.
* This way we can capture all the cases where the mime type is not known but still parseable as text
@ -189,4 +198,5 @@ module.exports = {
normalizePath,
isWithin,
sanitizeFileName,
documentsFolder,
};

View File

@ -4,6 +4,7 @@ import YouTube from "./youtube.svg";
import Link from "./link.svg";
import Confluence from "./confluence.jpeg";
import DrupalWiki from "./drupalwiki.jpg";
import Obsidian from "./obsidian.png";
const ConnectorImages = {
github: GitHub,
@ -12,6 +13,7 @@ const ConnectorImages = {
websiteDepth: Link,
confluence: Confluence,
drupalwiki: DrupalWiki,
obsidian: Obsidian,
};
export default ConnectorImages;

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

View File

@ -0,0 +1,175 @@
import { useState } from "react";
import { useTranslation } from "react-i18next";
import { FolderOpen, Info } from "@phosphor-icons/react";
import System from "@/models/system";
import showToast from "@/utils/toast";
export default function ObsidianOptions() {
const { t } = useTranslation();
const [loading, setLoading] = useState(false);
const [vaultPath, setVaultPath] = useState("");
const [selectedFiles, setSelectedFiles] = useState([]);
const handleFolderPick = async (e) => {
const files = Array.from(e.target.files);
if (files.length === 0) return;
// Filter for .md files only
const markdownFiles = files.filter((file) => file.name.endsWith(".md"));
setSelectedFiles(markdownFiles);
// Set the folder path from the first file
if (markdownFiles.length > 0) {
const path = markdownFiles[0].webkitRelativePath.split("/")[0];
setVaultPath(path);
}
};
const handleSubmit = async (e) => {
e.preventDefault();
if (selectedFiles.length === 0) return;
try {
setLoading(true);
showToast("Importing Obsidian vault - this may take a while.", "info", {
clear: true,
autoClose: false,
});
// Read all files and prepare them for submission
const fileContents = await Promise.all(
selectedFiles.map(async (file) => {
const content = await file.text();
return {
name: file.name,
path: file.webkitRelativePath,
content: content,
};
})
);
const { data, error } = await System.dataConnectors.obsidian.collect({
files: fileContents,
});
if (!!error) {
showToast(error, "error", { clear: true });
setLoading(false);
setSelectedFiles([]);
setVaultPath("");
return;
}
// Show results
const successCount = data.processed;
const failCount = data.failed;
const totalCount = data.total;
if (successCount === totalCount) {
showToast(
`Successfully imported ${successCount} files from your vault!`,
"success",
{ clear: true }
);
} else {
showToast(
`Imported ${successCount} files, ${failCount} failed`,
"warning",
{ clear: true }
);
}
setLoading(false);
} catch (e) {
console.error(e);
showToast(e.message, "error", { clear: true });
setLoading(false);
}
};
return (
<div className="flex w-full">
<div className="flex flex-col w-full px-1 md:pb-6 pb-16">
<form className="w-full" onSubmit={handleSubmit}>
<div className="w-full flex flex-col py-2">
<div className="w-full flex flex-col gap-4">
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
<div className="gap-x-2 flex items-center">
<Info className="shrink-0" size={25} />
<p className="text-sm">
{t("connectors.obsidian.vault_warning")}
</p>
</div>
</div>
<div className="flex flex-col">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold">
{t("connectors.obsidian.vault_location")}
</label>
<p className="text-xs font-normal text-theme-text-secondary">
{t("connectors.obsidian.vault_description")}
</p>
</div>
<div className="flex gap-x-2">
<input
type="text"
value={vaultPath}
onChange={(e) => setVaultPath(e.target.value)}
placeholder="/path/to/your/vault"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
required={true}
autoComplete="off"
spellCheck={false}
readOnly
/>
<label className="px-3 py-2 bg-theme-settings-input-bg border border-none rounded-lg text-white hover:bg-theme-settings-input-bg/80 cursor-pointer">
<FolderOpen size={20} />
<input
type="file"
webkitdirectory=""
onChange={handleFolderPick}
className="hidden"
/>
</label>
</div>
{selectedFiles.length > 0 && (
<>
<p className="text-xs text-white mt-2 font-bold">
{t("connectors.obsidian.selected_files", {
count: selectedFiles.length,
})}
</p>
{selectedFiles.map((file, i) => (
<p key={i} className="text-xs text-white mt-2">
{file.webkitRelativePath}
</p>
))}
</>
)}
</div>
</div>
</div>
<div className="flex flex-col gap-y-2 w-full pr-10">
<button
type="submit"
disabled={loading || selectedFiles.length === 0}
className="border-none mt-2 w-full justify-center px-4 py-2 rounded-lg text-dark-text light:text-white text-sm font-bold items-center flex gap-x-2 bg-theme-home-button-primary hover:bg-theme-home-button-primary-hover disabled:bg-theme-home-button-primary-hover disabled:cursor-not-allowed"
>
{loading
? t("connectors.obsidian.importing")
: t("connectors.obsidian.import_vault")}
</button>
{loading && (
<p className="text-xs text-white/50">
{t("connectors.obsidian.processing_time")}
</p>
)}
</div>
</form>
</div>
</div>
);
}

View File

@ -9,6 +9,7 @@ import DrupalWikiOptions from "./Connectors/DrupalWiki";
import { useState } from "react";
import ConnectorOption from "./ConnectorOption";
import WebsiteDepthOptions from "./Connectors/WebsiteDepth";
import ObsidianOptions from "./Connectors/Obsidian";
export const getDataConnectors = (t) => ({
github: {
@ -47,6 +48,12 @@ export const getDataConnectors = (t) => ({
description: "Import Drupal Wiki spaces in a single click.",
options: <DrupalWikiOptions />,
},
obsidian: {
name: "Obsidian",
image: ConnectorImages.obsidian,
description: "Import Obsidian vault in a single click.",
options: <ObsidianOptions />,
},
});
export default function DataConnectors() {

View File

@ -641,6 +641,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -679,6 +679,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: " adminvisning.",
accept: "Okay, jeg har forstået",
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: "Velkommen til dit nye arbejdsområde.",

View File

@ -677,6 +677,17 @@ const TRANSLATIONS = {
" entscheiden, welche Dokumente du beobachten möchtest.",
accept: "Alles klar, ich habe es verstanden.",
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: "Willkommen zu deinem Arbeitsbereich.",

View File

@ -721,6 +721,20 @@ const TRANSLATIONS = {
connectors: {
"search-placeholder": "Search data connectors",
"no-connectors": "No data connectors found.",
obsidian: {
name: "Obsidian",
description: "Import Obsidian vault in a single click.",
vault_location: "Vault Location",
vault_description:
"Select your Obsidian vault folder to import all notes and their connections.",
selected_files: "Found {{count}} markdown files",
importing: "Importing vault...",
import_vault: "Import Vault",
processing_time:
"This may take a while depending on the size of your vault.",
vault_warning:
"To avoid any conflicts, make sure your Obsidian vault is not currently open.",
},
github: {
name: "GitHub Repo",
description:

View File

@ -640,6 +640,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -633,6 +633,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -641,6 +641,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -626,6 +626,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -639,6 +639,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -671,6 +671,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: "管理画面から行えます。",
accept: "わかりました",
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: "新しいワークスペースへようこそ。",

View File

@ -626,6 +626,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -636,6 +636,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -645,6 +645,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -680,6 +680,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: " в режиме администратора.",
accept: "Хорошо, понял",
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: "Добро пожаловать в ваше новое рабочее пространство.",

View File

@ -636,6 +636,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -635,6 +635,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: null,
accept: null,
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: null,

View File

@ -66,7 +66,6 @@ const TRANSLATIONS = {
yes: "是",
no: "否",
},
settings: {
title: "设置",
system: "系统",
@ -221,12 +220,10 @@ const TRANSLATIONS = {
},
},
},
"new-workspace": {
title: "新工作区",
placeholder: "我的工作区",
},
"workspaces—settings": {
general: "通用设置",
chat: "聊天设置",
@ -234,7 +231,6 @@ const TRANSLATIONS = {
members: "成员",
agent: "代理配置",
},
general: {
vector: {
title: "向量数量",
@ -267,7 +263,6 @@ const TRANSLATIONS = {
"工作区。这将删除矢量数据库中的所有矢量嵌入。\n\n原始源文件将保持不变。此操作是不可逆转的。",
},
},
chat: {
llm: {
title: "工作区 LLM 提供者",
@ -332,7 +327,6 @@ const TRANSLATIONS = {
hint: "大多数 LLM 都有各种可接受的有效值范围。请咨询你的LLM提供商以获取该信息。",
},
},
"vector-workspace": {
identifier: "向量数据库标识符",
snippets: {
@ -359,7 +353,6 @@ const TRANSLATIONS = {
error: "无法重置工作区向量数据库!",
},
},
agent: {
"performance-warning":
"不明确支持工具调用的 LLMs 的性能高度依赖于模型的功能和准确性。有些能力可能受到限制或不起作用。",
@ -376,7 +369,6 @@ const TRANSLATIONS = {
description: "将用于此工作区的 @agent 代理的特定 LLM 模型。",
wait: "-- 等待模型 --",
},
skill: {
title: "默认代理技能",
description:
@ -411,7 +403,6 @@ const TRANSLATIONS = {
},
},
},
recorded: {
title: "工作区聊天历史记录",
description: "这些是用户发送的所有聊天记录和消息,按创建日期排序。",
@ -425,7 +416,6 @@ const TRANSLATIONS = {
at: "发送时间",
},
},
customization: {
interface: {
title: "界面偏好设置",
@ -515,7 +505,6 @@ const TRANSLATIONS = {
},
},
},
api: {
title: "API 密钥",
description: "API 密钥允许持有者以编程方式访问和管理此 AnythingLLM 实例。",
@ -527,14 +516,12 @@ const TRANSLATIONS = {
created: "创建时间",
},
},
llm: {
title: "LLM 首选项",
description:
"这些是你首选的 LLM 聊天和嵌入提供商的凭据和设置。重要的是,确保这些密钥是最新的和正确的,否则 AnythingLLM 将无法正常运行。",
provider: "LLM 提供商",
},
transcription: {
title: "转录模型首选项",
description:
@ -545,7 +532,6 @@ const TRANSLATIONS = {
"warn-recommend": "我们建议至少 2GB RAM 并上传 <10Mb 的文件。",
"warn-end": "内置模型将在首次使用时自动下载。",
},
embedding: {
title: "嵌入首选项",
"desc-start":
@ -557,7 +543,6 @@ const TRANSLATIONS = {
description: "使用 AnythingLLM 的本机嵌入引擎时不需要设置。",
},
},
text: {
title: "文本拆分和分块首选项",
"desc-start":
@ -571,13 +556,11 @@ const TRANSLATIONS = {
description: "这是单个向量中可以存在的字符的最大长度。",
recommend: "嵌入模型的最大长度为",
},
overlap: {
title: "文本块重叠",
description: "这是在两个相邻文本块之间分块期间发生的最大字符重叠。",
},
},
vector: {
title: "向量数据库",
description:
@ -587,7 +570,6 @@ const TRANSLATIONS = {
description: "LanceDB 不需要任何配置。",
},
},
embeddable: {
title: "可嵌入的聊天小部件",
description:
@ -599,7 +581,6 @@ const TRANSLATIONS = {
Active: "活动域",
},
},
"embed-chats": {
title: "嵌入的聊天历史纪录",
export: "导出",
@ -612,7 +593,6 @@ const TRANSLATIONS = {
at: "发送时间",
},
},
multi: {
title: "多用户模式",
description: "通过激活多用户模式来设置你的实例以支持你的团队。",
@ -636,7 +616,6 @@ const TRANSLATIONS = {
password: "实例密码",
},
},
event: {
title: "事件日志",
description: "查看此实例上发生的所有操作和事件以进行监控。",
@ -647,7 +626,6 @@ const TRANSLATIONS = {
occurred: "发生时间",
},
},
privacy: {
title: "隐私和数据处理",
description:
@ -657,7 +635,6 @@ const TRANSLATIONS = {
vector: "向量数据库",
anonymous: "启用匿名遥测",
},
connectors: {
"search-placeholder": "搜索数据连接器",
"no-connectors": "未找到数据连接器。",
@ -764,7 +741,6 @@ const TRANSLATIONS = {
pat_token_explained: "您的 Confluence 个人访问令牌。",
task_explained: "完成后,页面内容将可用于在文档选择器中嵌入至工作区。",
},
manage: {
documents: "文档",
"data-connectors": "数据连接器",
@ -828,8 +804,18 @@ const TRANSLATIONS = {
watch_explained_block3_end: " 管理视图中管理被监控的文档。",
accept: "好的,知道了",
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: "欢迎来到你的新工作区。",
get_started: "开始使用,请先",
@ -845,7 +831,6 @@ const TRANSLATIONS = {
microphone: "语音输入你的提示。",
send: "将提示消息发送到工作区",
},
profile_settings: {
edit_account: "编辑帐户",
profile_picture: "头像",

View File

@ -638,6 +638,17 @@ const TRANSLATIONS = {
watch_explained_block3_end: " 管理觀看的文件。",
accept: "好的,明白了",
},
obsidian: {
name: null,
description: null,
vault_location: null,
vault_description: null,
selected_files: null,
importing: null,
import_vault: null,
processing_time: null,
vault_warning: null,
},
},
chat_window: {
welcome: "歡迎使用您的新工作區。",

View File

@ -187,6 +187,26 @@ const DataConnector = {
});
},
},
obsidian: {
collect: async function ({ files }) {
return await fetch(`${API_BASE}/ext/obsidian/vault`, {
method: "POST",
headers: baseHeaders(),
body: JSON.stringify({
files,
}),
})
.then((res) => res.json())
.then((res) => {
if (!res.success) throw new Error(res.reason);
return { data: res.data, error: null };
})
.catch((e) => {
console.error(e);
return { data: null, error: e.message };
});
},
},
};
export default DataConnector;

View File

@ -148,6 +148,28 @@ function extensionEndpoints(app) {
}
}
);
app.post(
"/ext/obsidian/vault",
[validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
async (request, response) => {
try {
const responseFromProcessor =
await new CollectorApi().forwardExtensionRequest({
endpoint: "/ext/obsidian/vault",
method: "POST",
body: request.body,
});
await Telemetry.sendTelemetry("extension_invoked", {
type: "obsidian_vault",
});
response.status(200).json(responseFromProcessor);
} catch (e) {
console.error(e);
response.sendStatus(500).end();
}
}
);
}
module.exports = { extensionEndpoints };