merlyn/server/utils/agentFlows/executors/web-scraping.js
Sean Hatfield e5f3fb0892
Agent flow builder (#3077)
* wip agent builder

* refactor structure for agent builder

* improve ui for add block menu and sidebar

* lint

* node ui improvement

* handle deleting variable in all nodes

* add headers and body to apiCall node

* lint

* Agent flow builder backend (#3078)

* wip agent builder backend

* save/load agent tasks

* lint

* refactor agent task to use uuids instead of names

* placeholder for run task

* update frontend sidebar + seperate backend to agent-tasks utils

* lint

* add deleting of agent tasks

* create AgentTasks class + wip load agent tasks into aibitat

* lint

* inject + call agent tasks

* wip call agent tasks

* add llm instruction + fix api calling blocks

* add ui + backend for editing/toggling agent tasks

* lint

* add back middlewares

* disable run task + add navigate to home on logo click

* implement normalizePath to prevent path traversal

* wip make api calling more consistent

* lint

* rename all references from task to flow

* patch load flow bug when on editing page

* remove unneeded files/comments

* lint

* fix delete endpoint + rename load flows

* add move block to ui + fix api-call backend + add telemetry

* lint

* add web scraping block

* only allow admin for agent builder

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>

* Move AgentFlowManager flows to static
simplify UI states
Handle LLM prompt flow when provided non-string

* delete/edit menu for agent flow panel + update flow icon

* lint

* fix open builder button hidden bug

* add tooltips to move up/down block buttons

* add tooltip to delete block

* truncate block description to fit on blocklist component

* light mode agent builder sidebar

* light mode api call block

* fix light mode styles for agent builder blocks

* agent flow fetch in UI

* sync delete flow

* agent flow ui/ux improvements

* remove unused AgentSidebar component

* comment out /run

* UI changes and updates for flow builder

* format flow panel info

* update link handling

* ui tweaks to header menu

* remove unused import

* update doc links
update block icons

* bump readme

* Patch code block header oddity
resolves #3117

* bump dev image

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2025-02-12 16:50:43 -08:00

56 lines
1.6 KiB
JavaScript

const { CollectorApi } = require("../../collectorApi");
const { TokenManager } = require("../../helpers/tiktoken");
const Provider = require("../../agents/aibitat/providers/ai-provider");
const { summarizeContent } = require("../../agents/aibitat/utils/summarize");
/**
* Execute a web scraping flow step
* @param {Object} config Flow step configuration
* @param {Object} context Execution context with introspect function
* @returns {Promise<string>} Scraped content
*/
async function executeWebScraping(config, context) {
const { url } = config;
const { introspect, model, provider } = context;
if (!url) {
throw new Error("URL is required for web scraping");
}
introspect(`Scraping the content of ${url}`);
const { success, content } = await new CollectorApi().getLinkContent(url);
if (!success) {
introspect(`Could not scrape ${url}. Cannot use this page's content.`);
throw new Error("URL could not be scraped and no content was found.");
}
introspect(`Successfully scraped content from ${url}`);
if (!content || content?.length === 0) {
throw new Error("There was no content to be collected or read.");
}
const tokenCount = new TokenManager(model).countFromString(content);
const contextLimit = Provider.contextLimit(provider, model);
if (tokenCount < contextLimit) {
return content;
}
introspect(
`This page's content is way too long. I will summarize it right now.`
);
const summary = await summarizeContent({
provider,
model,
content,
});
introspect(`Successfully summarized content`);
return summary;
}
module.exports = executeWebScraping;