merlyn/collector/extensions/resync/index.js
Neha Prasad 3ecf218eea
feat: Add SSL certificate bypass support for self-hosted Confluence instances (#4219)
* Added bypassSSL parameter to constructor and implemented SSL bypass logic in fetchConfluenceData method

* Updated generateChunkSource function to include bypassSSL in the encrypted payload

* Updated the request body to include bypassSSL in the JSON payload sent to the backend

* Updated form submission to include bypassSSL parameter from the checkbox

* Added bypass_ssl: "Bypass SSL Certificate Validation" translation

* passed these parameters to fetchconfluencepage function for proper resync functionality

* allow ignore of SSL cert for Confluence

* add translations

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2025-11-25 14:32:10 -08:00

186 lines
7.3 KiB
JavaScript

const { getLinkText } = require("../../processLink");
/**
* Fetches the content of a raw link. Returns the content as a text string of the link in question.
* @param {object} data - metadata from document (eg: link)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncLink({ link }, response) {
if (!link) throw new Error('Invalid link provided');
try {
const { success, content = null } = await getLinkText(link);
if (!success) throw new Error(`Failed to sync link content. ${reason}`);
response.status(200).json({ success, content });
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
/**
* Fetches the content of a YouTube link. Returns the content as a text string of the video in question.
* We offer this as there may be some videos where a transcription could be manually edited after initial scraping
* but in general - transcriptions often never change.
* @param {object} data - metadata from document (eg: link)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncYouTube({ link }, response) {
if (!link) throw new Error('Invalid link provided');
try {
const { fetchVideoTranscriptContent } = require("../../utils/extensions/YoutubeTranscript");
const { success, reason, content } = await fetchVideoTranscriptContent({ url: link });
if (!success) throw new Error(`Failed to sync YouTube video transcript. ${reason}`);
response.status(200).json({ success, content });
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
/**
* Fetches the content of a specific confluence page via its chunkSource.
* Returns the content as a text string of the page in question and only that page.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncConfluence({ chunkSource }, response) {
if (!chunkSource) throw new Error('Invalid source property provided');
try {
// Confluence data is `payload` encrypted. So we need to expand its
// encrypted payload back into query params so we can reFetch the page with same access token/params.
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
const { fetchConfluencePage } = require("../../utils/extensions/Confluence");
const { success, reason, content } = await fetchConfluencePage({
pageUrl: `https:${source.pathname}`, // need to add back the real protocol
baseUrl: source.searchParams.get('baseUrl'),
spaceKey: source.searchParams.get('spaceKey'),
accessToken: source.searchParams.get('token'),
username: source.searchParams.get('username'),
cloud: source.searchParams.get('cloud') === 'true',
bypassSSL: source.searchParams.get('bypassSSL') === 'true',
});
if (!success) throw new Error(`Failed to sync Confluence page content. ${reason}`);
response.status(200).json({ success, content });
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
/**
* Fetches the content of a specific confluence page via its chunkSource.
* Returns the content as a text string of the page in question and only that page.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncGithub({ chunkSource }, response) {
if (!chunkSource) throw new Error('Invalid source property provided');
try {
// Github file data is `payload` encrypted (might contain PAT). So we need to expand its
// encrypted payload back into query params so we can reFetch the page with same access token/params.
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
const { fetchGithubFile } = require("../../utils/extensions/RepoLoader/GithubRepo");
const { success, reason, content } = await fetchGithubFile({
repoUrl: `https:${source.pathname}`, // need to add back the real protocol
branch: source.searchParams.get('branch'),
accessToken: source.searchParams.get('pat'),
sourceFilePath: source.searchParams.get('path'),
});
if (!success) throw new Error(`Failed to sync GitHub file content. ${reason}`);
response.status(200).json({ success, content });
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
/**
* Fetches the content of a specific DrupalWiki page via its chunkSource.
* Returns the content as a text string of the page in question and only that page.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncDrupalWiki({ chunkSource }, response) {
if (!chunkSource) throw new Error('Invalid source property provided');
try {
// DrupalWiki data is `payload` encrypted. So we need to expand its
// encrypted payload back into query params so we can reFetch the page with same access token/params.
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
const { loadPage } = require("../../utils/extensions/DrupalWiki");
const { success, reason, content } = await loadPage({
baseUrl: source.searchParams.get('baseUrl'),
pageId: source.searchParams.get('pageId'),
accessToken: source.searchParams.get('accessToken'),
});
if (!success) {
console.error(`Failed to sync DrupalWiki page content. ${reason}`);
response.status(200).json({
success: false,
content: null,
});
} else {
response.status(200).json({ success, content });
}
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
/**
* Fetches the content of a specific Paperless-ngx document via its chunkSource.
* Returns the content as a text string of the document.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncPaperlessNgx({ chunkSource }, response) {
if (!chunkSource) throw new Error('Invalid source property provided');
try {
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
const { PaperlessNgxLoader } = require("../../utils/extensions/PaperlessNgx/PaperlessNgxLoader");
const loader = new PaperlessNgxLoader({
baseUrl: source.searchParams.get('baseUrl'),
apiToken: source.searchParams.get('token'),
});
const documentId = source.pathname.split('//')[1];
const content = await loader.fetchDocumentContent(documentId);
if (!content) throw new Error('Failed to fetch document content');
response.status(200).json({ success: true, content });
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
module.exports = {
link: resyncLink,
youtube: resyncYouTube,
confluence: resyncConfluence,
github: resyncGithub,
drupalwiki: resyncDrupalWiki,
"paperless-ngx": resyncPaperlessNgx,
}