const path = require("path"); const filesystem = require("./lib.js"); const { safeJsonParse } = require("../../../../http/index.js"); module.exports.FilesystemSearchFiles = { name: "filesystem-search-files", plugin: function () { return { name: "filesystem-search-files", setup(aibitat) { aibitat.function({ super: aibitat, name: this.name, description: "Search for files by name or content. USE THIS FIRST when you need to find a file " + "but don't know its exact location. " + "Two modes: 'glob' matches file paths/names (e.g., '*.csv', 'config'), " + "'content' searches inside files using regex (like grep). " + "Set 'includeFileContents: true' to also read and return the full contents of matching files " + "in a single operation (useful when you need to find AND read files). " + "Simple patterns like 'sales.csv' automatically match files containing that string anywhere.", examples: [ { prompt: "Find all JavaScript files", call: JSON.stringify({ pattern: "**/*.js", mode: "glob", includeFileContents: false, }), }, { prompt: "Find all CSV files", call: JSON.stringify({ pattern: "*.csv", mode: "glob", }), }, { prompt: "Search for error handling code", call: JSON.stringify({ pattern: "catch.*error", mode: "content", filePattern: "*.js", includeFileContents: true, maxFilesToRead: 3, }), }, { prompt: "Find the config file and show its contents", call: JSON.stringify({ pattern: "config", mode: "glob", includeFileContents: true, }), }, ], parameters: { $schema: "http://json-schema.org/draft-07/schema#", type: "object", properties: { pattern: { type: "string", description: "For glob mode: a glob pattern to match file paths. " + "For content mode: the text or regex pattern to search for in file contents.", }, mode: { type: "string", enum: ["glob", "content"], default: "glob", description: "Search mode: 'glob' for matching file paths, 'content' for searching file contents.", }, filePattern: { type: "string", description: "For content mode only: glob pattern to filter which files to search (e.g., '*.js', '*.{ts,tsx}').", }, excludePatterns: { type: "array", items: { type: "string" }, default: [], description: "Patterns to exclude from search (e.g., 'node_modules', '*.log').", }, caseSensitive: { type: "boolean", default: true, description: "For content mode: whether the search should be case-sensitive.", }, maxResults: { type: "number", default: 100, description: "Maximum number of results to return.", }, includeFileContents: { type: "boolean", default: false, description: "If true, read and return the full contents of matching files (limited by maxFilesToRead). " + "Useful when you need to analyze files, not just find them.", }, maxFilesToRead: { type: "number", default: 5, description: "When includeFileContents is true, maximum number of files to read contents from.", }, }, required: ["pattern"], additionalProperties: false, }, handler: async function ({ pattern = "", mode = "glob", filePattern = "", excludePatterns = [], caseSensitive = true, maxResults = 100, includeFileContents = false, maxFilesToRead = 5, }) { try { this.super.handlerProps.log( `Using the filesystem-search-files tool.` ); await filesystem.ensureInitialized(); const allowedDirs = filesystem.getAllowedDirectories(); if (allowedDirs.length === 0) { return "Error: No allowed directories configured"; } if (mode === "glob") { const allResults = []; const seenPaths = new Set(); // If pattern has no glob characters, convert to wildcard patterns // e.g., "sales" matches files containing "sales" anywhere in the name const hasGlobChars = /[*?[\]{}]/.test(pattern); const effectivePatterns = hasGlobChars ? [pattern] : [`*${pattern}*`, `**/*${pattern}*`]; const patternNote = effectivePatterns.length > 1 || effectivePatterns[0] !== pattern ? ` (using pattern: ${effectivePatterns.join(" or ")})` : ""; this.super.introspect( `${this.caller}: Searching for "${pattern}"${patternNote} in ${allowedDirs.length} allowed director${allowedDirs.length === 1 ? "y" : "ies"}` ); for (const dir of allowedDirs) { try { const { files } = searchFilesWithRipgrepGlob({ searchPath: dir, patterns: effectivePatterns, excludePatterns, maxResults: maxResults - allResults.length, }); for (const filePath of files) { if (!seenPaths.has(filePath)) { seenPaths.add(filePath); allResults.push(filePath); } } } catch { // Skip directories that fail (e.g., don't exist) } } const limitedResults = allResults.slice(0, maxResults); this.super.introspect( `Found ${allResults.length} matching files${allResults.length > maxResults ? ` (showing first ${maxResults})` : ""}` ); if (limitedResults.length === 0) return "No matches found"; if (includeFileContents) { return await readMatchingFileContents.call( this, limitedResults, maxFilesToRead ); } return limitedResults.join("\n"); } // Content search mode using ripgrep across all allowed directories this.super.introspect( `${this.caller}: Searching for "${pattern}" in file contents across ${allowedDirs.length} allowed director${allowedDirs.length === 1 ? "y" : "ies"}` ); const allResults = []; const seenKeys = new Set(); for (const dir of allowedDirs) { try { const results = searchWithRipgrep({ searchPath: dir, pattern, filePattern, excludePatterns, caseSensitive, maxResults: maxResults - allResults.length, }); for (const result of results) { const key = `${result.file}:${result.line}`; if (!seenKeys.has(key)) { seenKeys.add(key); allResults.push(result); } } if (allResults.length >= maxResults) break; } catch { // Skip directories that fail } } this.super.introspect( `Found ${allResults.length} matches${allResults.length > maxResults ? ` (showing first ${maxResults})` : ""}` ); if (includeFileContents) { const uniqueFiles = [...new Set(allResults.map((r) => r.file))]; return await readMatchingFileContents.call( this, uniqueFiles, maxFilesToRead ); } return formatSearchResults(allResults, maxResults); } catch (e) { this.super.handlerProps.log( `filesystem-search-files error: ${e.message}` ); this.super.introspect(`Error: ${e.message}`); return `Error searching files: ${e.message}`; } }, }); }, }; }, }; /** * Search for files by glob pattern using ripgrep (fast file listing). * @returns {{ files: string[], method: string }} */ function searchFilesWithRipgrepGlob({ searchPath, patterns, excludePatterns = [], maxResults = 100, }) { const { spawnSync } = require("child_process"); let rgPath; try { ({ rgPath } = require("@vscode/ripgrep")); } catch { throw new Error("@vscode/ripgrep not installed"); } // Build ripgrep arguments for file listing const args = [ "--files", // List files instead of searching content "--no-ignore", // Search all files, even those in .gitignore ]; // Add glob patterns (ripgrep uses --glob for filtering --files output) for (const pattern of patterns) args.push("--glob", pattern); for (const exclude of excludePatterns) args.push("--glob", `!${exclude}`); args.push(searchPath); const result = spawnSync(rgPath, args, { encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, }); if (result.status > 1) { throw new Error( result.stderr || `ripgrep exited with code ${result.status}` ); } // unique files const files = new Set(); if (!result.stdout) return { files: Array.from(files), method: "ripgrep" }; const lines = result.stdout.trim().split("\n").filter(Boolean); for (const line of lines) { files.add(line); if (files.size >= maxResults) break; } return { files: Array.from(files), method: "ripgrep" }; } /** * Search file contents using @vscode/ripgrep binary directly via spawnSync. */ function searchWithRipgrep({ searchPath, pattern, filePattern, excludePatterns, caseSensitive, maxResults, }) { const { spawnSync } = require("child_process"); let rgPath; try { ({ rgPath } = require("@vscode/ripgrep")); } catch { throw new Error("@vscode/ripgrep not installed"); } // Build ripgrep arguments const args = [ "--json", // JSON output for structured parsing "--line-number", // Include line numbers "--no-ignore", // Search all files, even those in .gitignore "--max-count", String(maxResults), ]; if (!caseSensitive) args.push("--ignore-case"); if (filePattern) args.push("--glob", filePattern); for (const exclude of excludePatterns) args.push("--glob", `!${exclude}`); // Pattern and path come last args.push(pattern, searchPath); const result = spawnSync(rgPath, args, { encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, // 10MB }); // Exit code 1 means no matches (not an error) if (result.status > 1) { throw new Error( result.stderr || `ripgrep exited with code ${result.status}` ); } const results = []; if (!result.stdout) return results; const matches = safeJsonParse(result.stdout, []).filter( (m) => m.type === "match" && m.data ); for (const match of matches) { results.push({ file: match.data.path?.text || match.data.path, line: match.data.line_number, content: (match.data.lines?.text || "").trim(), }); } return results; } /** * Format search results for display. */ function formatSearchResults(results, maxResults) { if (results.length === 0) { return "No matches found"; } const formatted = results .slice(0, maxResults) .map((r) => `${r.file}:${r.line}: ${r.content}`) .join("\n"); const suffix = results.length > maxResults ? `\n\n... and ${results.length - maxResults} more matches` : ""; return formatted + suffix; } /** * Read contents of matching files and add citations. * @param {string[]} filePaths - Array of file paths to read * @param {number} maxFiles - Maximum number of files to read * @returns {Promise} Combined file contents */ async function readMatchingFileContents(filePaths, maxFiles) { const filesToRead = filePaths.slice(0, maxFiles); const skippedCount = filePaths.length - filesToRead.length; this.super.introspect( `${this.caller}: Reading contents of ${filesToRead.length} file${filesToRead.length === 1 ? "" : "s"}${skippedCount > 0 ? ` (${skippedCount} more files not read)` : ""}` ); const results = []; for (const filePath of filesToRead) { try { const content = await filesystem.readFileContent(filePath); const filename = path.basename(filePath); this.super.addCitation?.({ id: `fs-${Buffer.from(filePath).toString("base64url").slice(0, 32)}`, title: filename, text: content, chunkSource: filePath, score: null, }); results.push({ path: filePath, content, success: true, }); } catch (error) { results.push({ path: filePath, content: `Error reading file: ${error.message}`, success: false, }); } } const combinedContent = results .map((r) => r.success ? `=== ${r.path} ===\n${r.content}` : `=== ${r.path} ===\n${r.content}` ) .join("\n\n---\n\n"); const { content: finalContent, wasTruncated } = filesystem.truncateContentForContext( combinedContent, this.super, `[Content truncated - file contents exceed context limit. Try reducing maxFilesToRead or searching more specifically.]` ); if (wasTruncated) { this.super.introspect( `${this.caller}: File contents were truncated to fit context limit` ); } const header = skippedCount > 0 ? `Found ${filePaths.length} matching files. Showing contents of first ${filesToRead.length}:\n\n` : ""; return header + finalContent; }