merlyn/server/jobs/cleanup-generated-files.js
Marcello Fitton 41495cdabe
feat: Scheduled Jobs (#5322)
* initialize

* expand tool result text limit | add syntax highlighting and json formatting to tool result rendering

* fix onError jsdoc

* lint

* fix unread icon

* route protection

* improve form handling for NewJobModal

* safeJsonParse

* remove unneeded comments

* remove trycatch

* add truncateText helper

* add explicit fallback value tos safeJsonParse

* add shared cron constant and helpers

* reduce frontend indirection

* use isLight to compute syntax highlighting theme

* remove dead code

* remove forJob and make job limit to 50

* create recomputeNextRunAt helper method

* add comment about nextRunAt recomputation

* add job queue and concurrency control to scheduled jobs

* use p-queue

* change default max concurrent value to 1

* add comment explaining internal scheduling system

* add recomputeNextRunAt on boot

* add generated documents to run details

* Modify toolsOverride functionality where no tools selected means no tools are given to the agent

add a select all/deselect all toggle button for easily selecting all
tools in the cerate job form

* create usePolling hook

* add polling to scheduled jobs and scheduled job runs pages

* add cron generation feature in job form

* remove cron generation feature | add cron builder feature | add max active scheduled jobs limit

* set MAX_ACTIVE to null

* replace hour and minute input fields with input with type time

* simplify

* organize components

* move components to bottom of page component

* change Generated Documents to Generated Files

* add i18n to cronstrue

* add i18n

* add type="button" to button elements

* refactor fileSource retrieval logic

* one scheduled job run can have status "running"

* add protection of file retrieveal from scheduled job in multiuser mode

* fix comments

* make job status default to queued

* add queued status

* fix bug with result trace rendering

* store timeout ref and clearTimeout once race settles

* remove unneeded handlerPromise tracking

* move imports to top level

* refactor hardcoded paths to path resolve functions

* implement new job form design

* simplify

* fix button styles

* fix runJob bug

* implement styles for scheduled jobs page

* apply dark mode figma styles

* delete unused translation key

* implement light mode for new new job modal, run history, and run details

* lint

* fix light mode scroll bar in tool call card

* adjust table header contrast

* fix type in subtitle

* kill workers when job is in-flight before deleting job

* add border-none to buttons

* change locale time to iso string

* import BackgroundService module level | instatiate backgroundService singltone once and reuse across handlers

* add p-queue, @breejs/later and cron-validate as core deps

* parse cron expression to a builder state once

* add theme to day buttons in cron builder

* fix stale tools selection caption

* flip popover when popover clips screen height

* make ScheduleJob.trigger() await the run insertion | disable run now button if job is in flight

* regen table

* refactor generated file card

* refactor frontend

* remove logs

* major refactor for tool picking, fix bree/later bug

* combine action endpoints, move contine to method

* fix unoptimized query with include + take + order

* fix dangerous use, refactor job to utils

* add copy content to text response

* improve notification system subscription for browser

* remove unused translations

* prevent gen-file cleanup job from deleting active job file generated references

* rich text copy

* Scheduled Jobs: Translations (#5482)

* add locales for scheduled jobs

* i18n

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>

* add config flag with UI notice

* update README

* telemetry datapoints

* Always use UTC on backend, convert to local in frontend

* fix tz render

* Add job killing

* cleanup thinking text in job notifications and break out reasoning in response text.
Also hide zero metrics since that is useless

* Port generatedFile schema to the normalized workspace chat `outputs` file format so porting to thread is simple and implem between chats <> jobs is 1:1

* what the fuck

* compiled bug

* fixed thinking oddity in complied frontend

* supress multi-toast

* fix duration call

* Revert "fix duration call"

This reverts commit 0491bc71f4223e65ea4046561b15b268fefb8da2.

* revert and reapply fix

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2026-04-29 12:05:46 -07:00

161 lines
4.7 KiB
JavaScript

const { log, conclude } = require("./helpers/index.js");
const { WorkspaceChats } = require("../models/workspaceChats.js");
const { ScheduledJobRun } = require("../models/scheduledJobRun.js");
const createFilesLib = require("../utils/agents/aibitat/plugins/create-files/lib.js");
const { safeJsonParse } = require("../utils/http/index.js");
(async () => {
try {
const fs = require("fs");
const path = require("path");
const storageDirectory = await createFilesLib.getOutputDirectory();
if (!fs.existsSync(storageDirectory)) return;
const files = fs.readdirSync(storageDirectory);
if (files.length === 0) return;
// Get all storage filenames referenced in active (include: true) chats
const activeFileRefs = await getActiveStorageFilenames();
const filesToDelete = [];
for (const filename of files) {
const fullPath = path.join(storageDirectory, filename);
const stat = fs.statSync(fullPath);
// Skip files/folders that don't match our naming pattern and add to deletion list
if (!filename.match(/^[a-z]+-[a-f0-9-]{36}(\.\w+)?$/i)) {
filesToDelete.push({ path: fullPath, isDirectory: stat.isDirectory() });
continue;
}
// If file/folder is not referenced in any active chat, add to deletion list
if (!activeFileRefs.has(filename))
filesToDelete.push({ path: fullPath, isDirectory: stat.isDirectory() });
}
if (filesToDelete.length === 0) return;
log(`Found ${filesToDelete.length} orphaned files/folders to delete.`);
let deletedCount = 0;
let failedCount = 0;
for (const { path: itemPath, isDirectory } of filesToDelete) {
try {
if (isDirectory) fs.rmSync(itemPath, { recursive: true });
else fs.unlinkSync(itemPath);
deletedCount++;
} catch (error) {
failedCount++;
log(`Failed to delete ${itemPath}: ${error.message}`);
}
}
log(
`Cleanup complete: deleted ${deletedCount} items, ${failedCount} failures.`
);
} catch (error) {
console.error(error);
log(`Error during cleanup: ${error.message}`);
} finally {
conclude();
}
})();
/**
* Retrieves all storage filenames referenced in active (include: true) workspace chats.
* Searches through the outputs array in chat responses.
* Uses pagination to avoid loading all chats into memory at once.
* @param {number} batchSize - Number of chats to process per batch (default: 50)
* @returns {Promise<Set<string>>}
*/
async function getActiveStorageFilenames(batchSize = 50) {
const [workspaceChats, scheduledJobRuns] = await Promise.all([
workspaceChatGeneratedFilenames(batchSize),
scheduledJobRunGeneratedFilenames(batchSize),
]);
return new Set([...workspaceChats, ...scheduledJobRuns]);
}
async function workspaceChatGeneratedFilenames(batchSize = 50) {
const storageFilenames = new Set();
try {
let offset = 0;
let hasMore = true;
while (hasMore) {
const chats = await WorkspaceChats.where(
{ include: true },
batchSize,
{ id: "asc" },
offset
);
if (chats.length === 0) {
hasMore = false;
break;
}
for (const chat of chats) {
try {
const response = safeJsonParse(chat.response, { outputs: [] });
for (const output of response.outputs) {
if (!output || !output.payload || !output.payload.storageFilename)
continue;
storageFilenames.add(output.payload.storageFilename);
}
} catch {
continue;
}
}
offset += chats.length;
hasMore = chats.length === batchSize;
}
} catch (error) {
console.error("[workspaceChatGeneratedFilenames] Error:", error.message);
}
return storageFilenames;
}
async function scheduledJobRunGeneratedFilenames(batchSize = 50) {
const storageFilenames = new Set();
try {
let offset = 0;
let hasMore = true;
while (hasMore) {
const runs = await ScheduledJobRun.where(
{ status: "completed" },
batchSize,
{ id: "asc" },
{},
offset
);
if (runs.length === 0) {
hasMore = false;
break;
}
for (const run of runs) {
try {
const response = safeJsonParse(run.result, { outputs: [] });
for (const output of response.outputs) {
if (!output?.payload?.storageFilename) continue;
storageFilenames.add(output.payload.storageFilename);
}
} catch {
continue;
}
}
offset += runs.length;
hasMore = runs.length === batchSize;
}
} catch (error) {
console.error("[scheduledJobRunGeneratedFilenames] Error:", error.message);
}
return storageFilenames;
}