Silent truncation of legal documents is unacceptable - Merlyn was reading partial files without knowing it. New behavior: - Files under AGENT_MAX_FILE_TOKENS (default 500,000): return full content - Files over limit: return explicit message with token count and options - Paralegal controls any summarization decision - Merlyn never silently truncates legal documents
147 lines
6.0 KiB
JavaScript
147 lines
6.0 KiB
JavaScript
const path = require("path");
|
|
const filesystem = require("./lib.js");
|
|
|
|
module.exports.FilesystemReadTextFile = {
|
|
name: "filesystem-read-text-file",
|
|
plugin: function () {
|
|
return {
|
|
name: "filesystem-read-text-file",
|
|
setup(aibitat) {
|
|
aibitat.function({
|
|
super: aibitat,
|
|
name: this.name,
|
|
description:
|
|
"Read the contents of a file from the file system. " +
|
|
"Supports many file types: text, code, PDFs, Word docs, audio/video (transcribed to text), and more. " +
|
|
"Image files (png, jpg, jpeg, gif, webp, svg, bmp) are automatically attached for you to view and analyze visually. " +
|
|
"IMPORTANT: Only use this tool when you know the exact file path. " +
|
|
"If you don't know where a file is located, use 'filesystem-search-files' first " +
|
|
"to find it (e.g., search for '*.csv' or the filename). " +
|
|
"Use the 'head' parameter to read only the first N lines, or 'tail' for the last N lines (text files only). " +
|
|
"Only works within allowed directories.",
|
|
examples: [
|
|
{
|
|
prompt: "Read the contents of config.json",
|
|
call: JSON.stringify({ path: "config.json" }),
|
|
},
|
|
{
|
|
prompt: "Show me the last 50 lines of the log file",
|
|
call: JSON.stringify({ path: "logs/app.log", tail: 50 }),
|
|
},
|
|
{
|
|
prompt: "Read just the first 10 lines of README.md",
|
|
call: JSON.stringify({ path: "README.md", head: 10 }),
|
|
},
|
|
{
|
|
prompt: "Show me the screenshot.png image",
|
|
call: JSON.stringify({ path: "screenshot.png" }),
|
|
},
|
|
],
|
|
parameters: {
|
|
$schema: "http://json-schema.org/draft-07/schema#",
|
|
type: "object",
|
|
properties: {
|
|
path: {
|
|
type: "string",
|
|
description:
|
|
"The path to the file to read. Can be relative to the allowed directory or absolute within allowed directories.",
|
|
},
|
|
head: {
|
|
type: "number",
|
|
description:
|
|
"If provided, returns only the first N lines of the file.",
|
|
},
|
|
tail: {
|
|
type: "number",
|
|
description:
|
|
"If provided, returns only the last N lines of the file.",
|
|
},
|
|
},
|
|
required: ["path"],
|
|
additionalProperties: false,
|
|
},
|
|
handler: async function ({ path: filePath = "", head, tail }) {
|
|
try {
|
|
this.super.handlerProps.log(
|
|
`Using the filesystem-read-text-file tool.`
|
|
);
|
|
|
|
if (head && tail) {
|
|
return "Error: Cannot specify both head and tail parameters simultaneously.";
|
|
}
|
|
|
|
const validPath = await filesystem.validatePath(filePath);
|
|
|
|
if (filesystem.isImageFile(validPath)) {
|
|
this.super.introspect(
|
|
`${this.caller}: Detected image file ${filePath}, attaching for viewing`
|
|
);
|
|
const attachment =
|
|
await filesystem.readImageAsAttachment(validPath);
|
|
if (attachment) {
|
|
this.super.addToolAttachment?.(attachment);
|
|
const filename = path.basename(validPath);
|
|
return `Image file "${filename}" has been attached and is now visible in the conversation. You can describe what you see in the image.`;
|
|
}
|
|
return `Error: Could not read image file "${path.basename(validPath)}"`;
|
|
}
|
|
|
|
this.super.introspect(`${this.caller}: Reading file ${filePath}`);
|
|
|
|
let content;
|
|
if (tail) {
|
|
content = await filesystem.tailFile(validPath, tail);
|
|
this.super.introspect(
|
|
`Retrieved last ${tail} lines of ${filePath}`
|
|
);
|
|
} else if (head) {
|
|
content = await filesystem.headFile(validPath, head);
|
|
this.super.introspect(
|
|
`Retrieved first ${head} lines of ${filePath}`
|
|
);
|
|
} else {
|
|
content = await filesystem.readFileContent(validPath);
|
|
this.super.introspect(`Successfully read ${filePath}`);
|
|
}
|
|
|
|
const { TokenManager } = require("../../../../helpers/tiktoken");
|
|
const tokenManager = new TokenManager(this.super.model);
|
|
const tokenCount = tokenManager.countFromString(content);
|
|
const maxFileTokens = Number(process.env.AGENT_MAX_FILE_TOKENS) || 500_000;
|
|
|
|
if (tokenCount > maxFileTokens) {
|
|
return [
|
|
`File "${filePath}" contains ${tokenCount.toLocaleString()} tokens, which exceeds the limit of ${maxFileTokens.toLocaleString()} tokens.`,
|
|
`To proceed, choose one of the following options:`,
|
|
`1. Read specific portions using the head or tail parameters`,
|
|
`2. Search for specific content using search-files`,
|
|
`3. Request a summary of the file`,
|
|
].join("\n");
|
|
}
|
|
|
|
const finalContent = content;
|
|
|
|
const filename = path.basename(validPath);
|
|
this.super.addCitation?.({
|
|
id: `fs-${Buffer.from(validPath).toString("base64url").slice(0, 32)}`,
|
|
title: filename,
|
|
text: finalContent,
|
|
chunkSource: validPath,
|
|
score: null,
|
|
});
|
|
|
|
return finalContent;
|
|
} catch (e) {
|
|
this.super.handlerProps.log(
|
|
`filesystem-read-text-file error: ${e.message}`
|
|
);
|
|
this.super.introspect(`Error: ${e.message}`);
|
|
return `Error reading file: ${e.message}`;
|
|
}
|
|
},
|
|
});
|
|
},
|
|
};
|
|
},
|
|
};
|