merlyn/server/utils/helpers/chat/convertTo.js
Marcello Fitton 5716ac5ed5
Custom Default System Prompt (#4487)
* Add Default System Prompt Management

- Introduced a new route for fetching and updating the default system prompt in the backend.
- Added a new Admin page for managing the default system prompt, including a form for editing and saving changes.
- Updated the SettingsSidebar to include a link to the new Default System Prompt page.
- Implemented fetching of available system prompt variables for use in the prompt editor.
- Enhanced the ChatSettings and ChatPromptSettings components to support the new default system prompt functionality.

This commit lays the groundwork for improved management of system prompts across workspaces.

* Remove validation for system prompt in ChatSettings component

* Add comment for system prompt in workspaces model

* linting, simplify logic for default assumption

* dev build

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-11-24 13:24:10 -08:00

257 lines
6.8 KiB
JavaScript

// Helpers that convert workspace chats to some supported format
// for external use by the user.
const { WorkspaceChats } = require("../../../models/workspaceChats");
const { EmbedChats } = require("../../../models/embedChats");
const { safeJsonParse } = require("../../http");
const { SystemSettings } = require("../../../models/systemSettings");
async function convertToCSV(preparedData) {
const headers = new Set(["id", "workspace", "prompt", "response", "sent_at"]);
preparedData.forEach((item) =>
Object.keys(item).forEach((key) => headers.add(key))
);
const rows = [Array.from(headers).join(",")];
for (const item of preparedData) {
const record = Array.from(headers)
.map((header) => {
const value = item[header] ?? "";
return escapeCsv(String(value));
})
.join(",");
rows.push(record);
}
return rows.join("\n");
}
async function convertToJSON(preparedData) {
return JSON.stringify(preparedData, null, 4);
}
// ref: https://raw.githubusercontent.com/gururise/AlpacaDataCleaned/main/alpaca_data.json
async function convertToJSONAlpaca(preparedData) {
return JSON.stringify(preparedData, null, 4);
}
// You can validate JSONL outputs on https://jsonlines.org/validator/
async function convertToJSONL(workspaceChatsMap) {
return Object.values(workspaceChatsMap)
.map((workspaceChats) => JSON.stringify(workspaceChats))
.join("\n");
}
async function prepareChatsForExport(format = "jsonl", chatType = "workspace") {
if (!exportMap.hasOwnProperty(format))
throw new Error(`Invalid export type: ${format}`);
let chats;
if (chatType === "workspace") {
chats = await WorkspaceChats.whereWithData({}, null, null, {
id: "asc",
});
} else if (chatType === "embed") {
chats = await EmbedChats.whereWithEmbedAndWorkspace(
{},
null,
{
id: "asc",
},
null
);
} else {
throw new Error(`Invalid chat type: ${chatType}`);
}
if (format === "csv" || format === "json") {
const preparedData = chats.map((chat) => {
const responseJson = safeJsonParse(chat.response, {});
const baseData = {
id: chat.id,
prompt: chat.prompt,
response: responseJson.text,
sent_at: chat.createdAt,
// Only add attachments to the json format since we cannot arrange attachments in csv format
...(format === "json"
? {
attachments:
responseJson.attachments?.length > 0
? responseJson.attachments.map((attachment) => ({
type: "image",
image: attachmentToDataUrl(attachment),
}))
: [],
}
: {}),
};
if (chatType === "embed") {
return {
...baseData,
workspace: chat.embed_config
? chat.embed_config.workspace.name
: "unknown workspace",
};
}
return {
...baseData,
workspace: chat.workspace ? chat.workspace.name : "unknown workspace",
username: chat.user
? chat.user.username
: chat.api_session_id !== null
? "API"
: "unknown user",
rating:
chat.feedbackScore === null
? "--"
: chat.feedbackScore
? "GOOD"
: "BAD",
};
});
return preparedData;
}
// jsonAlpaca format does not support array outputs
if (format === "jsonAlpaca") {
const preparedData = chats.map((chat) => {
const responseJson = safeJsonParse(chat.response, {});
return {
instruction: buildSystemPrompt(
chat,
chat.workspace ? chat.workspace.openAiPrompt : null
),
input: chat.prompt,
output: responseJson.text,
};
});
return preparedData;
}
// Export to JSONL format (recommended for fine-tuning)
const workspaceChatsMap = chats.reduce((acc, chat) => {
const { prompt, response, workspaceId } = chat;
const responseJson = safeJsonParse(response, { attachments: [] });
const attachments = responseJson.attachments;
if (!acc[workspaceId]) {
acc[workspaceId] = {
messages: [
{
role: "system",
content: [
{
type: "text",
text:
chat.workspace?.openAiPrompt ??
SystemSettings.saneDefaultSystemPrompt,
},
],
},
],
};
}
acc[workspaceId].messages.push(
{
role: "user",
content: [
{
type: "text",
text: prompt,
},
...(attachments?.length > 0
? attachments.map((attachment) => ({
type: "image",
image: attachmentToDataUrl(attachment),
}))
: []),
],
},
{
role: "assistant",
content: [
{
type: "text",
text: responseJson.text,
},
],
}
);
return acc;
}, {});
return workspaceChatsMap;
}
const exportMap = {
json: {
contentType: "application/json",
func: convertToJSON,
},
csv: {
contentType: "text/csv",
func: convertToCSV,
},
jsonl: {
contentType: "application/jsonl",
func: convertToJSONL,
},
jsonAlpaca: {
contentType: "application/json",
func: convertToJSONAlpaca,
},
};
function escapeCsv(str) {
if (str === null || str === undefined) return '""';
return `"${str.replace(/"/g, '""').replace(/\n/g, " ")}"`;
}
async function exportChatsAsType(format = "jsonl", chatType = "workspace") {
const { contentType, func } = exportMap.hasOwnProperty(format)
? exportMap[format]
: exportMap.jsonl;
const chats = await prepareChatsForExport(format, chatType);
return {
contentType,
data: await func(chats),
};
}
function buildSystemPrompt(chat, prompt = null) {
const sources = safeJsonParse(chat.response)?.sources || [];
const contextTexts = sources.map((source) => source.text);
const context =
sources.length > 0
? "\nContext:\n" +
contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")
: "";
return `${prompt ?? SystemSettings.saneDefaultSystemPrompt}${context}`;
}
/**
* Converts an attachment's content string to a proper data URL format if needed
* @param {Object} attachment - The attachment object containing contentString and mime type
* @returns {string} The properly formatted data URL
*/
function attachmentToDataUrl(attachment) {
return attachment.contentString.startsWith("data:")
? attachment.contentString
: `data:${attachment.mime};base64,${attachment.contentString}`;
}
module.exports = {
prepareChatsForExport,
exportChatsAsType,
};