RSA-Signing on server<->collector communication via API (#1005)

* WIP integrity check between processes

* Implement integrity checking on document processor payloads
This commit is contained in:
Timothy Carambat 2024-04-01 13:56:35 -07:00 committed by GitHub
parent 200bd7f061
commit f4088d9348
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 278 additions and 72 deletions

View File

@ -2,6 +2,7 @@
"cSpell.words": [ "cSpell.words": [
"anythingllm", "anythingllm",
"Astra", "Astra",
"comkey",
"Dockerized", "Dockerized",
"Embeddable", "Embeddable",
"GROQ", "GROQ",
@ -20,4 +21,4 @@
], ],
"eslint.experimental.useFlatConfig": true, "eslint.experimental.useFlatConfig": true,
"docker.languageserver.formatter.ignoreMultilineInstructions": true "docker.languageserver.formatter.ignoreMultilineInstructions": true
} }

View File

@ -1,9 +1,10 @@
const { verifyPayloadIntegrity } = require("../middleware/verifyIntegrity");
const { reqBody } = require("../utils/http"); const { reqBody } = require("../utils/http");
function extensions(app) { function extensions(app) {
if (!app) return; if (!app) return;
app.post("/ext/github-repo", async function (request, response) { app.post("/ext/github-repo", [verifyPayloadIntegrity], async function (request, response) {
try { try {
const loadGithubRepo = require("../utils/extensions/GithubRepo"); const loadGithubRepo = require("../utils/extensions/GithubRepo");
const { success, reason, data } = await loadGithubRepo(reqBody(request)); const { success, reason, data } = await loadGithubRepo(reqBody(request));
@ -24,7 +25,7 @@ function extensions(app) {
}); });
// gets all branches for a specific repo // gets all branches for a specific repo
app.post("/ext/github-repo/branches", async function (request, response) { app.post("/ext/github-repo/branches", [verifyPayloadIntegrity], async function (request, response) {
try { try {
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader"); const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches() const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches()
@ -48,7 +49,7 @@ function extensions(app) {
return; return;
}); });
app.post("/ext/youtube-transcript", async function (request, response) { app.post("/ext/youtube-transcript", [verifyPayloadIntegrity], async function (request, response) {
try { try {
const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript");
const { success, reason, data } = await loadYouTubeTranscript(reqBody(request)); const { success, reason, data } = await loadYouTubeTranscript(reqBody(request));

View File

@ -13,6 +13,7 @@ const { processLink } = require("./processLink");
const { wipeCollectorStorage } = require("./utils/files"); const { wipeCollectorStorage } = require("./utils/files");
const extensions = require("./extensions"); const extensions = require("./extensions");
const { processRawText } = require("./processRawText"); const { processRawText } = require("./processRawText");
const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
const app = express(); const app = express();
app.use(cors({ origin: true })); app.use(cors({ origin: true }));
@ -24,71 +25,83 @@ app.use(
}) })
); );
app.post("/process", async function (request, response) { app.post(
const { filename, options = {} } = reqBody(request); "/process",
try { [verifyPayloadIntegrity],
const targetFilename = path async function (request, response) {
.normalize(filename) const { filename, options = {} } = reqBody(request);
.replace(/^(\.\.(\/|\\|$))+/, ""); try {
const { const targetFilename = path
success, .normalize(filename)
reason, .replace(/^(\.\.(\/|\\|$))+/, "");
documents = [], const {
} = await processSingleFile(targetFilename, options); success,
response reason,
.status(200) documents = [],
.json({ filename: targetFilename, success, reason, documents }); } = await processSingleFile(targetFilename, options);
} catch (e) { response
console.error(e); .status(200)
response.status(200).json({ .json({ filename: targetFilename, success, reason, documents });
filename: filename, } catch (e) {
success: false, console.error(e);
reason: "A processing error occurred.", response.status(200).json({
documents: [], filename: filename,
}); success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
} }
return; );
});
app.post("/process-link", async function (request, response) { app.post(
const { link } = reqBody(request); "/process-link",
try { [verifyPayloadIntegrity],
const { success, reason, documents = [] } = await processLink(link); async function (request, response) {
response.status(200).json({ url: link, success, reason, documents }); const { link } = reqBody(request);
} catch (e) { try {
console.error(e); const { success, reason, documents = [] } = await processLink(link);
response.status(200).json({ response.status(200).json({ url: link, success, reason, documents });
url: link, } catch (e) {
success: false, console.error(e);
reason: "A processing error occurred.", response.status(200).json({
documents: [], url: link,
}); success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
} }
return; );
});
app.post("/process-raw-text", async function (request, response) { app.post(
const { textContent, metadata } = reqBody(request); "/process-raw-text",
try { [verifyPayloadIntegrity],
const { async function (request, response) {
success, const { textContent, metadata } = reqBody(request);
reason, try {
documents = [], const {
} = await processRawText(textContent, metadata); success,
response reason,
.status(200) documents = [],
.json({ filename: metadata.title, success, reason, documents }); } = await processRawText(textContent, metadata);
} catch (e) { response
console.error(e); .status(200)
response.status(200).json({ .json({ filename: metadata.title, success, reason, documents });
filename: metadata?.title || "Unknown-doc.txt", } catch (e) {
success: false, console.error(e);
reason: "A processing error occurred.", response.status(200).json({
documents: [], filename: metadata?.title || "Unknown-doc.txt",
}); success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
} }
return; );
});
extensions(app); extensions(app);

View File

@ -0,0 +1,21 @@
const { CommunicationKey } = require("../utils/comKey");
function verifyPayloadIntegrity(request, response, next) {
const comKey = new CommunicationKey();
if (process.env.NODE_ENV === "development") {
comKey.log('verifyPayloadIntegrity is skipped in development.')
next();
return;
}
const signature = request.header("X-Integrity");
if (!signature) return response.status(400).json({ msg: 'Failed integrity signature check.' })
const validSignedPayload = comKey.verify(signature, request.body);
if (!validSignedPayload) return response.status(400).json({ msg: 'Failed integrity signature check.' })
next();
}
module.exports = {
verifyPayloadIntegrity
}

View File

@ -4,11 +4,26 @@ const {
WATCH_DIRECTORY, WATCH_DIRECTORY,
SUPPORTED_FILETYPE_CONVERTERS, SUPPORTED_FILETYPE_CONVERTERS,
} = require("../utils/constants"); } = require("../utils/constants");
const { trashFile, isTextType } = require("../utils/files"); const {
trashFile,
isTextType,
normalizePath,
isWithin,
} = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"]; const RESERVED_FILES = ["__HOTDIR__.md"];
async function processSingleFile(targetFilename, options = {}) { async function processSingleFile(targetFilename, options = {}) {
const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename); const fullFilePath = path.resolve(
WATCH_DIRECTORY,
normalizePath(targetFilename)
);
if (!isWithin(path.resolve(WATCH_DIRECTORY), fullFilePath))
return {
success: false,
reason: "Filename is a not a valid path to process.",
documents: [],
};
if (RESERVED_FILES.includes(targetFilename)) if (RESERVED_FILES.includes(targetFilename))
return { return {
success: false, success: false,

View File

@ -0,0 +1,42 @@
const crypto = require("crypto");
const fs = require("fs");
const path = require("path");
const keyPath =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../../server/storage/comkey`)
: path.resolve(process.env.STORAGE_DIR, `comkey`);
class CommunicationKey {
#pubKeyName = "ipc-pub.pem";
#storageLoc = keyPath;
constructor() {}
log(text, ...args) {
console.log(`\x1b[36m[CommunicationKeyVerify]\x1b[0m ${text}`, ...args);
}
#readPublicKey() {
return fs.readFileSync(path.resolve(this.#storageLoc, this.#pubKeyName));
}
// Given a signed payload from private key from /app/server/ this signature should
// decode to match the textData provided. This class does verification only in collector.
// Note: The textData is typically the JSON stringified body sent to the document processor API.
verify(signature = "", textData = "") {
try {
let data = textData;
if (typeof textData !== "string") data = JSON.stringify(data);
return crypto.verify(
"RSA-SHA256",
Buffer.from(data),
this.#readPublicKey(),
Buffer.from(signature, "hex")
);
} catch {}
return false;
}
}
module.exports = { CommunicationKey };

View File

@ -108,10 +108,33 @@ async function wipeCollectorStorage() {
return; return;
} }
/**
* Checks if a given path is within another path.
* @param {string} outer - The outer path (should be resolved).
* @param {string} inner - The inner path (should be resolved).
* @returns {boolean} - Returns true if the inner path is within the outer path, false otherwise.
*/
function isWithin(outer, inner) {
if (outer === inner) return false;
const rel = path.relative(outer, inner);
return !rel.startsWith("../") && rel !== "..";
}
function normalizePath(filepath = "") {
const result = path
.normalize(filepath.trim())
.replace(/^(\.\.(\/|\\|$))+/, "")
.trim();
if (["..", ".", "/"].includes(result)) throw new Error("Invalid path.");
return result;
}
module.exports = { module.exports = {
trashFile, trashFile,
isTextType, isTextType,
createdDate, createdDate,
writeToServerDocuments, writeToServerDocuments,
wipeCollectorStorage, wipeCollectorStorage,
normalizePath,
isWithin,
}; };

1
server/.gitignore vendored
View File

@ -3,6 +3,7 @@
storage/assets/* storage/assets/*
!storage/assets/anything-llm.png !storage/assets/anything-llm.png
storage/documents/* storage/documents/*
storage/comkey/*
storage/tmp/* storage/tmp/*
storage/vector-cache/*.json storage/vector-cache/*.json
storage/exports storage/exports

View File

@ -1,4 +1,5 @@
const { Telemetry } = require("../../models/telemetry"); const { Telemetry } = require("../../models/telemetry");
const { CommunicationKey } = require("../comKey");
const setupTelemetry = require("../telemetry"); const setupTelemetry = require("../telemetry");
function bootSSL(app, port = 3001) { function bootSSL(app, port = 3001) {
@ -16,6 +17,7 @@ function bootSSL(app, port = 3001) {
.createServer(credentials, app) .createServer(credentials, app)
.listen(port, async () => { .listen(port, async () => {
await setupTelemetry(); await setupTelemetry();
new CommunicationKey(true);
console.log(`Primary server in HTTPS mode listening on port ${port}`); console.log(`Primary server in HTTPS mode listening on port ${port}`);
}) })
.on("error", catchSigTerms); .on("error", catchSigTerms);
@ -40,6 +42,7 @@ function bootHTTP(app, port = 3001) {
app app
.listen(port, async () => { .listen(port, async () => {
await setupTelemetry(); await setupTelemetry();
new CommunicationKey(true);
console.log(`Primary server in HTTP mode listening on port ${port}`); console.log(`Primary server in HTTP mode listening on port ${port}`);
}) })
.on("error", catchSigTerms); .on("error", catchSigTerms);

View File

@ -5,6 +5,8 @@
class CollectorApi { class CollectorApi {
constructor() { constructor() {
const { CommunicationKey } = require("../comKey");
this.comkey = new CommunicationKey();
this.endpoint = `http://0.0.0.0:${process.env.COLLECTOR_PORT || 8888}`; this.endpoint = `http://0.0.0.0:${process.env.COLLECTOR_PORT || 8888}`;
} }
@ -40,15 +42,19 @@ class CollectorApi {
async processDocument(filename = "") { async processDocument(filename = "") {
if (!filename) return false; if (!filename) return false;
const data = JSON.stringify({
filename,
options: this.#attachOptions(),
});
return await fetch(`${this.endpoint}/process`, { return await fetch(`${this.endpoint}/process`, {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Integrity": this.comkey.sign(data),
}, },
body: JSON.stringify({ body: data,
filename,
options: this.#attachOptions(),
}),
}) })
.then((res) => { .then((res) => {
if (!res.ok) throw new Error("Response could not be completed"); if (!res.ok) throw new Error("Response could not be completed");
@ -64,12 +70,14 @@ class CollectorApi {
async processLink(link = "") { async processLink(link = "") {
if (!link) return false; if (!link) return false;
const data = JSON.stringify({ link });
return await fetch(`${this.endpoint}/process-link`, { return await fetch(`${this.endpoint}/process-link`, {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Integrity": this.comkey.sign(data),
}, },
body: JSON.stringify({ link }), body: data,
}) })
.then((res) => { .then((res) => {
if (!res.ok) throw new Error("Response could not be completed"); if (!res.ok) throw new Error("Response could not be completed");
@ -83,12 +91,14 @@ class CollectorApi {
} }
async processRawText(textContent = "", metadata = {}) { async processRawText(textContent = "", metadata = {}) {
const data = JSON.stringify({ textContent, metadata });
return await fetch(`${this.endpoint}/process-raw-text`, { return await fetch(`${this.endpoint}/process-raw-text`, {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Integrity": this.comkey.sign(data),
}, },
body: JSON.stringify({ textContent, metadata }), body: data,
}) })
.then((res) => { .then((res) => {
if (!res.ok) throw new Error("Response could not be completed"); if (!res.ok) throw new Error("Response could not be completed");
@ -110,6 +120,7 @@ class CollectorApi {
body, // Stringified JSON! body, // Stringified JSON!
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
"X-Integrity": this.comkey.sign(body),
}, },
}) })
.then((res) => { .then((res) => {

View File

@ -0,0 +1,75 @@
const crypto = require("crypto");
const fs = require("fs");
const path = require("path");
const keyPath =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/comkey`)
: path.resolve(process.env.STORAGE_DIR, `comkey`);
// What does this class do?
// This class generates a hashed version of some text (typically a JSON payload) using a rolling RSA key
// that can then be appended as a header value to do integrity checking on a payload. Given the
// nature of this class and that keys are rolled constantly, this protects the request
// integrity of requests sent to the collector as only the server can sign these requests.
// This keeps accidental misconfigurations of AnythingLLM that leaving port 8888 open from
// being abused or SSRF'd by users scraping malicious sites who have a loopback embedded in a <script>, for example.
// Since each request to the collector must be signed to be valid, unsigned requests directly to the collector
// will be dropped and must go through the /server endpoint directly.
class CommunicationKey {
#privKeyName = "ipc-priv.pem";
#pubKeyName = "ipc-pub.pem";
#storageLoc = keyPath;
// Init the class and determine if keys should be rolled.
// This typically occurs on boot up so key is fresh each boot.
constructor(generate = false) {
if (generate) this.#generate();
}
log(text, ...args) {
console.log(`\x1b[36m[CommunicationKey]\x1b[0m ${text}`, ...args);
}
#readPrivateKey() {
return fs.readFileSync(path.resolve(this.#storageLoc, this.#privKeyName));
}
#generate() {
const keyPair = crypto.generateKeyPairSync("rsa", {
modulusLength: 2048,
publicKeyEncoding: {
type: "pkcs1",
format: "pem",
},
privateKeyEncoding: {
type: "pkcs1",
format: "pem",
},
});
if (!fs.existsSync(this.#storageLoc))
fs.mkdirSync(this.#storageLoc, { recursive: true });
fs.writeFileSync(
`${path.resolve(this.#storageLoc, this.#privKeyName)}`,
keyPair.privateKey
);
fs.writeFileSync(
`${path.resolve(this.#storageLoc, this.#pubKeyName)}`,
keyPair.publicKey
);
this.log(
"RSA key pair generated for signed payloads within AnythingLLM services."
);
}
// This instance of ComKey on server is intended for generation of Priv/Pub key for signing and decoding.
// this resource is shared with /collector/ via a class of the same name in /utils which does decoding/verification only
// while this server class only does signing with the private key.
sign(textData = "") {
return crypto
.sign("RSA-SHA256", Buffer.from(textData), this.#readPrivateKey())
.toString("hex");
}
}
module.exports = { CommunicationKey };