const fs = require("fs/promises"); const path = require("path"); const os = require("os"); const { randomBytes } = require("crypto"); const { createTwoFilesPatch } = require("diff"); const { humanFileSize } = require("../../../../helpers"); /** * Manages filesystem operations with security constraints. * Ensures all file operations stay within allowed directories. */ class FilesystemManager { static FILE_READ_CHUNK_SIZE = 1024; static CONTEXT_RESERVE_RATIO = 0.25; static IMAGE_EXTENSIONS = [ ".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg", ".bmp", ]; static IMAGE_MIME_TYPES = { ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif", ".webp": "image/webp", ".svg": "image/svg+xml", ".bmp": "image/bmp", }; /** * Checks if the filesystem tool is available. * The filesystem tool is only available when running in a docker container * or in development mode. * @returns {boolean} True if the tool is available */ isToolAvailable() { if (process.env.NODE_ENV === "development") return true; return process.env.ANYTHING_LLM_RUNTIME === "docker"; } #allowedDirectories = []; #isInitialized = false; /** * Gets the default filesystem root path. * @returns {string} The default filesystem root path */ #getDefaultFilesystemRoot() { const storageRoot = process.env.STORAGE_DIR || path.resolve(__dirname, "../../../../../storage"); return path.join(storageRoot, "anythingllm-fs"); } /** * Initializes the filesystem with default or configured directories. * @param {string[]} [directories] - Optional array of directories to allow * @returns {Promise} The initialized allowed directories */ async #initializeFilesystem(directories = null) { if (directories && directories.length > 0) { this.#allowedDirectories = directories.map((dir) => path.resolve(this.#expandHome(dir)) ); } else { const defaultRoot = this.#getDefaultFilesystemRoot(); this.#allowedDirectories = [defaultRoot]; } for (const dir of this.#allowedDirectories) { try { await fs.mkdir(dir, { recursive: true }); } catch (error) { console.error( `Warning: Could not create directory ${dir}: ${error.message}` ); } } this.#isInitialized = true; return this.#allowedDirectories; } /** * Expands home directory tildes in paths. * @param {string} filepath - The path to expand * @returns {string} Expanded path */ #expandHome(filepath) { if (filepath.startsWith("~/") || filepath === "~") { return path.join(os.homedir(), filepath.slice(1)); } return filepath; } /** * Normalizes a path by standardizing format. * @param {string} p - The path to normalize * @returns {string} Normalized path */ #normalizePath(p) { p = p.trim().replace(/^["']|["']$/g, ""); if (p.startsWith("/")) return p.replace(/\/+/g, "/").replace(/(? { const normalizedDir = this.#normalizeAndValidatePath(dir); if (!normalizedDir) return false; if (normalizedPath === normalizedDir) return true; return normalizedPath.startsWith(normalizedDir + path.sep); }); } /** * Resolves a relative path against allowed directories. * @param {string} relativePath - The relative path to resolve * @returns {string} The resolved absolute path */ #resolveRelativePathAgainstAllowedDirectories(relativePath) { if (this.#allowedDirectories.length === 0) { return path.resolve(process.cwd(), relativePath); } for (const allowedDir of this.#allowedDirectories) { const candidate = path.resolve(allowedDir, relativePath); const normalizedCandidate = this.#normalizePath(candidate); if ( this.#isPathWithinAllowedDirectories( normalizedCandidate, this.#allowedDirectories ) ) { return candidate; } } return path.resolve(this.#allowedDirectories[0], relativePath); } /** * Normalizes line endings to Unix-style. * @param {string} text - Text to normalize * @returns {string} Text with normalized line endings */ #normalizeLineEndings(text) { return text.replace(/\r\n/g, "\n"); } /** * Writes content to a file atomically using a temp file and rename. * @param {string} filePath - Path to the file * @param {string} content - Content to write * @returns {Promise} */ async #atomicWrite(filePath, content) { const tempPath = `${filePath}.${randomBytes(16).toString("hex")}.tmp`; try { await fs.writeFile(tempPath, content, "utf-8"); await fs.rename(tempPath, filePath); } catch (error) { try { await fs.unlink(tempPath); } catch {} throw error; } } /** * Reads file content with collector API fallback. * @param {string} filePath - Path to the file * @param {Function} processContent - Function to process parsed content * @param {Function} rawFallback - Function to call if collector fails * @returns {Promise} Processed content */ async #withCollectorFallback(filePath, processContent, rawFallback) { const parseResult = await this.#parseFileWithCollector(filePath); if (parseResult.parsed && parseResult.content) { return processContent(parseResult.content); } return rawFallback(); } /** * Creates a unified diff between two strings. * @param {string} originalContent - Original content * @param {string} newContent - New content * @param {string} filepath - File path for diff header * @returns {string} Unified diff string */ #createUnifiedDiff(originalContent, newContent, filepath = "file") { const normalizedOriginal = this.#normalizeLineEndings(originalContent); const normalizedNew = this.#normalizeLineEndings(newContent); return createTwoFilesPatch( filepath, filepath, normalizedOriginal, normalizedNew, "original", "modified" ); } /** * Reads file content as text using direct file read. * @param {string} filePath - Path to the file * @param {string} encoding - File encoding * @returns {Promise} File content */ async #readFileContentRaw(filePath, encoding = "utf-8") { return await fs.readFile(filePath, encoding); } /** * Parses a file using the collector API to extract text content. * @param {string} filePath - Absolute path to the file * @returns {Promise<{content: string, parsed: boolean}>} Parsed content */ async #parseFileWithCollector(filePath) { try { const { CollectorApi } = require("../../../../collectorApi"); const collectorApi = new CollectorApi(); const isOnline = await collectorApi.online(); if (!isOnline) { return { content: null, parsed: false, error: "Collector service offline", }; } const filename = path.basename(filePath); const result = await collectorApi.parseDocument(filename, { absolutePath: filePath, }); if (!result || !result.success) { return { content: null, parsed: false, error: result?.reason || "Failed to parse document", }; } if (result.content) return { content: result.content, parsed: true }; if (result.documents && result.documents.length > 0) { const content = result.documents .map((doc) => doc.pageContent || doc.content || "") .filter(Boolean) .join("\n\n"); if (content) return { content, parsed: true }; } return { content: null, parsed: false, error: "No content in response" }; } catch (error) { return { content: null, parsed: false, error: error.message }; } } /** * Gets the last N lines of a file using raw file operations. * @param {string} filePath - Path to the file * @param {number} numLines - Number of lines to return * @returns {Promise} Last N lines of the file */ async #tailFileRaw(filePath, numLines) { const stats = await fs.stat(filePath); const fileSize = stats.size; if (fileSize === 0) return ""; const fileHandle = await fs.open(filePath, "r"); try { const lines = []; let position = fileSize; const chunk = Buffer.alloc(FilesystemManager.FILE_READ_CHUNK_SIZE); let linesFound = 0; let remainingText = ""; while (position > 0 && linesFound < numLines) { const size = Math.min(FilesystemManager.FILE_READ_CHUNK_SIZE, position); position -= size; const { bytesRead } = await fileHandle.read(chunk, 0, size, position); if (!bytesRead) break; const readData = chunk.slice(0, bytesRead).toString("utf-8"); const chunkText = readData + remainingText; const chunkLines = this.#normalizeLineEndings(chunkText).split("\n"); if (position > 0) { remainingText = chunkLines[0]; chunkLines.shift(); } for ( let i = chunkLines.length - 1; i >= 0 && linesFound < numLines; i-- ) { lines.unshift(chunkLines[i]); linesFound++; } } return lines.join("\n"); } finally { await fileHandle.close(); } } /** * Gets the first N lines of a file using raw file operations. * @param {string} filePath - Path to the file * @param {number} numLines - Number of lines to return * @returns {Promise} First N lines of the file */ async #headFileRaw(filePath, numLines) { const fileHandle = await fs.open(filePath, "r"); try { const lines = []; let buffer = ""; let bytesRead = 0; const chunk = Buffer.alloc(FilesystemManager.FILE_READ_CHUNK_SIZE); while (lines.length < numLines) { const result = await fileHandle.read(chunk, 0, chunk.length, bytesRead); if (result.bytesRead === 0) break; bytesRead += result.bytesRead; buffer += chunk.slice(0, result.bytesRead).toString("utf-8"); const newLineIndex = buffer.lastIndexOf("\n"); if (newLineIndex !== -1) { const completeLines = buffer.slice(0, newLineIndex).split("\n"); buffer = buffer.slice(newLineIndex + 1); for (const line of completeLines) { lines.push(line); if (lines.length >= numLines) break; } } } if (buffer.length > 0 && lines.length < numLines) { lines.push(buffer); } return lines.join("\n"); } finally { await fileHandle.close(); } } /** * Gets the current allowed directories. * @returns {string[]} Array of allowed directory paths */ getAllowedDirectories() { return [...this.#allowedDirectories]; } /** * Ensures the filesystem is initialized before use. * @returns {Promise} */ async ensureInitialized() { if (!this.#isInitialized) await this.#initializeFilesystem(); } /** * Validates a path for security, ensuring it's within allowed directories. * @param {string} requestedPath - The path to validate * @returns {Promise} The validated absolute path * @throws {Error} If path is outside allowed directories */ async validatePath(requestedPath) { await this.ensureInitialized(); const expandedPath = this.#expandHome(requestedPath); const absolute = path.isAbsolute(expandedPath) ? path.resolve(expandedPath) : this.#resolveRelativePathAgainstAllowedDirectories(expandedPath); const normalizedRequested = this.#normalizePath(absolute); const isAllowed = this.#isPathWithinAllowedDirectories( normalizedRequested, this.#allowedDirectories ); if (!isAllowed) { console.log( `[validatePath] Access denied - path outside allowed directories: ${absolute} not in ${this.#allowedDirectories.join(", ")}` ); throw new Error(`Access denied - path outside allowed directories.`); } try { const realPath = await fs.realpath(absolute); const normalizedReal = this.#normalizePath(realPath); if ( !this.#isPathWithinAllowedDirectories( normalizedReal, this.#allowedDirectories ) ) { console.log( `[validatePath] Access denied - symlink target outside allowed directories: ${realPath} not in ${this.#allowedDirectories.join(", ")}` ); throw new Error( `Access denied - symlink target outside allowed directories.` ); } return realPath; } catch (error) { if (error.code === "ENOENT") { const parentDir = path.dirname(absolute); try { const realParentPath = await fs.realpath(parentDir); const normalizedParent = this.#normalizePath(realParentPath); if ( !this.#isPathWithinAllowedDirectories( normalizedParent, this.#allowedDirectories ) ) { console.log( `[validatePath] Access denied - parent directory outside allowed directories: ${realParentPath} not in ${this.#allowedDirectories.join(", ")}` ); throw new Error( `Access denied - parent directory outside allowed directories.` ); } return absolute; } catch { throw new Error(`Parent directory does not exist: ${parentDir}`); } } throw error; } } /** * Gets detailed file statistics. * @param {string} filePath - Path to the file * @returns {Promise} File statistics */ async getFileStats(filePath) { const stats = await fs.stat(filePath); return { size: stats.size, sizeFormatted: humanFileSize(stats.size, true, 2), created: stats.birthtime.toISOString(), modified: stats.mtime.toISOString(), accessed: stats.atime.toISOString(), isDirectory: stats.isDirectory(), isFile: stats.isFile(), permissions: stats.mode.toString(8).slice(-3), }; } /** * Reads file content, using the collector API to parse binary files. * @param {string} filePath - Path to the file * @param {string} encoding - File encoding (default: utf-8) * @returns {Promise} File content */ async readFileContent(filePath, encoding = "utf-8") { return this.#withCollectorFallback( filePath, (content) => content, () => this.#readFileContentRaw(filePath, encoding) ); } /** * Writes content to a file securely. * @param {string} filePath - Path to the file * @param {string} content - Content to write * @returns {Promise} */ async writeFileContent(filePath, content) { try { await fs.writeFile(filePath, content, { encoding: "utf-8", flag: "wx" }); } catch (error) { if (error.code === "EEXIST") { await this.#atomicWrite(filePath, content); } else { throw error; } } } /** * Applies edits to a file. * @param {string} filePath - Path to the file * @param {Array<{oldText: string, newText: string}>} edits - Array of edits * @param {boolean} dryRun - If true, only preview changes * @returns {Promise} Diff of changes */ async applyFileEdits(filePath, edits, dryRun = false) { const content = this.#normalizeLineEndings( await fs.readFile(filePath, "utf-8") ); let modifiedContent = content; for (const edit of edits) { const normalizedOld = this.#normalizeLineEndings(edit.oldText); const normalizedNew = this.#normalizeLineEndings(edit.newText); if (modifiedContent.includes(normalizedOld)) { modifiedContent = modifiedContent.replace(normalizedOld, normalizedNew); continue; } const oldLines = normalizedOld.split("\n"); const contentLines = modifiedContent.split("\n"); let matchFound = false; for (let i = 0; i <= contentLines.length - oldLines.length; i++) { const potentialMatch = contentLines.slice(i, i + oldLines.length); const isMatch = oldLines.every((oldLine, j) => { const contentLine = potentialMatch[j]; return oldLine.trim() === contentLine.trim(); }); if (isMatch) { const originalIndent = contentLines[i].match(/^\s*/)?.[0] || ""; const newLines = normalizedNew.split("\n").map((line, j) => { if (j === 0) return originalIndent + line.trimStart(); const oldIndent = oldLines[j]?.match(/^\s*/)?.[0] || ""; const newIndent = line.match(/^\s*/)?.[0] || ""; if (oldIndent && newIndent) { const relativeIndent = newIndent.length - oldIndent.length; return ( originalIndent + " ".repeat(Math.max(0, relativeIndent)) + line.trimStart() ); } return line; }); contentLines.splice(i, oldLines.length, ...newLines); modifiedContent = contentLines.join("\n"); matchFound = true; break; } } if (!matchFound) { throw new Error( `Could not find exact match for edit:\n${edit.oldText}` ); } } const diffResult = this.#createUnifiedDiff( content, modifiedContent, filePath ); let numBackticks = 3; while (diffResult.includes("`".repeat(numBackticks))) { numBackticks++; } const formattedDiff = `${"`".repeat(numBackticks)}diff\n${diffResult}${"`".repeat(numBackticks)}\n\n`; if (!dryRun) { await this.#atomicWrite(filePath, modifiedContent); } return formattedDiff; } /** * Gets the last N lines of a file. * @param {string} filePath - Path to the file * @param {number} numLines - Number of lines to return * @returns {Promise} Last N lines of the file */ async tailFile(filePath, numLines) { return this.#withCollectorFallback( filePath, (content) => { const lines = this.#normalizeLineEndings(content).split("\n"); return lines.slice(-numLines).join("\n"); }, () => this.#tailFileRaw(filePath, numLines) ); } /** * Gets the first N lines of a file. * @param {string} filePath - Path to the file * @param {number} numLines - Number of lines to return * @returns {Promise} First N lines of the file */ async headFile(filePath, numLines) { return this.#withCollectorFallback( filePath, (content) => { const lines = this.#normalizeLineEndings(content).split("\n"); return lines.slice(0, numLines).join("\n"); }, () => this.#headFileRaw(filePath, numLines) ); } /** * Searches for files matching a glob pattern. * @param {string} rootPath - Root path to search from * @param {string} pattern - Glob pattern to match * @param {Object} options - Search options * @param {string[]} options.excludePatterns - Patterns to exclude * @returns {Promise} Array of matching file paths */ async searchFilesWithGlob(rootPath, pattern, options = {}) { const minimatch = require("minimatch"); const { excludePatterns = [] } = options; const results = []; const matchOptions = { dot: true, nocase: true }; const search = async (currentPath) => { const entries = await fs.readdir(currentPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentPath, entry.name); try { await this.validatePath(fullPath); const relativePath = path.relative(rootPath, fullPath); const shouldExclude = excludePatterns.some( (excludePattern) => minimatch(relativePath, excludePattern, matchOptions) || minimatch(entry.name, excludePattern, matchOptions) ); if (shouldExclude) continue; const matchesPath = minimatch(relativePath, pattern, matchOptions); const matchesName = minimatch(entry.name, pattern, matchOptions); if (matchesPath || matchesName) { results.push(fullPath); } if (entry.isDirectory()) { await search(fullPath); } } catch { continue; } } }; await search(rootPath); return results; } /** * Truncates content if it exceeds the model's context limit. * @param {string} content - The content to potentially truncate * @param {object} aibitat - The aibitat instance with model/provider info * @param {string} [truncationMessage] - Optional custom message * @returns {{content: string, wasTruncated: boolean}} The content and truncation status */ truncateContentForContext(content, aibitat, truncationMessage = null) { const { TokenManager } = require("../../../../helpers/tiktoken"); const Provider = require("../../providers/ai-provider"); const contextLimit = Provider.contextLimit(aibitat.provider, aibitat.model); const reserveForResponse = Math.floor( contextLimit * FilesystemManager.CONTEXT_RESERVE_RATIO ); const maxTokens = contextLimit - reserveForResponse; const tokenManager = new TokenManager(aibitat.model); const tokenCount = tokenManager.countFromString(content); if (tokenCount <= maxTokens) { return { content, wasTruncated: false }; } const avgCharsPerToken = content.length / tokenCount; let targetChars = Math.floor(maxTokens * avgCharsPerToken); let truncated = content.slice(0, targetChars); const lastNewline = truncated.lastIndexOf("\n"); if (lastNewline > targetChars * 0.8) { truncated = truncated.slice(0, lastNewline); } const defaultMessage = "[Content truncated - exceeds context limit. Consider reading smaller portions.]"; const message = truncationMessage || defaultMessage; return { content: truncated + "\n\n" + message, wasTruncated: true, }; } /** * Check if a file path points to an image file. * @param {string} filePath - Path to the file * @returns {boolean} True if the file is an image */ isImageFile(filePath) { const ext = path.extname(filePath).toLowerCase(); return FilesystemManager.IMAGE_EXTENSIONS.includes(ext); } /** * Get the MIME type for an image file. * @param {string} filePath - Path to the file * @returns {string|null} MIME type or null if not an image */ getImageMimeType(filePath) { const ext = path.extname(filePath).toLowerCase(); return FilesystemManager.IMAGE_MIME_TYPES[ext] || null; } /** * Read an image file and return it as an attachment object. * @param {string} filePath - Validated absolute path to the image file * @returns {Promise<{name: string, mime: string, contentString: string}|null>} Attachment object or null on error */ async readImageAsAttachment(filePath) { try { const mime = this.getImageMimeType(filePath); if (!mime) return null; const buffer = await fs.readFile(filePath); const base64 = buffer.toString("base64"); const filename = path.basename(filePath); return { name: filename, mime, contentString: `data:${mime};base64,${base64}`, }; } catch (error) { console.error(`Error reading image file ${filePath}:`, error.message); return null; } } } module.exports = new FilesystemManager();