fix: preserve Confluence context paths (#5415)

* fix: preserve confluence context paths

* lint and minor changes

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
Asish Kumar 2026-04-14 01:40:40 +05:30 committed by GitHub
parent 6ef114df19
commit 91e75c27c2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 150 additions and 7 deletions

View File

@ -0,0 +1,125 @@
/* eslint-env jest, node */
process.env.STORAGE_DIR = "test-storage";
const { resolveConfluenceBaseUrl } = require("../../../../utils/extensions/Confluence");
const {
ConfluencePagesLoader,
} = require("../../../../utils/extensions/Confluence/ConfluenceLoader");
describe("resolveConfluenceBaseUrl", () => {
test("cloud: strips path and returns origin only", () => {
expect(
resolveConfluenceBaseUrl("https://example.atlassian.net/wiki/spaces/SP", true)
).toBe("https://example.atlassian.net");
});
test("self-hosted: preserves context path, strips trailing slash", () => {
expect(
resolveConfluenceBaseUrl("https://my.domain.com/confluence/", false)
).toBe("https://my.domain.com/confluence");
});
test("self-hosted: returns origin when no context path", () => {
expect(
resolveConfluenceBaseUrl("https://my.domain.com/", false)
).toBe("https://my.domain.com");
});
});
describe("ConfluencePagesLoader", () => {
afterEach(() => {
jest.restoreAllMocks();
});
describe("cloud mode", () => {
test("API requests include /wiki prefix", async () => {
const fetchMock = jest.spyOn(global, "fetch").mockResolvedValue({
ok: true,
json: jest.fn().mockResolvedValue({ size: 0, results: [] }),
});
const loader = new ConfluencePagesLoader({
baseUrl: resolveConfluenceBaseUrl("https://example.atlassian.net/wiki/spaces/SP", true),
spaceKey: "SP",
username: "user",
accessToken: "token",
cloud: true,
});
await loader.fetchAllPagesInSpace();
expect(fetchMock).toHaveBeenCalledWith(
"https://example.atlassian.net/wiki/rest/api/content?spaceKey=SP&limit=25&start=0&expand=body.storage,version",
expect.any(Object)
);
});
test("page URLs include /wiki prefix", () => {
const loader = new ConfluencePagesLoader({
baseUrl: resolveConfluenceBaseUrl("https://example.atlassian.net/wiki", true),
spaceKey: "SP",
username: "user",
accessToken: "token",
cloud: true,
});
const document = loader.createDocumentFromPage({
id: "123",
status: "current",
title: "Cloud page",
type: "page",
body: { storage: { value: "<p>Hello</p>" } },
version: { number: 1, by: { displayName: "User" }, when: "2026-01-01T00:00:00.000Z" },
});
expect(document.metadata.url).toBe(
"https://example.atlassian.net/wiki/spaces/SP/pages/123"
);
});
});
describe("self-hosted mode", () => {
test("API requests use context path without /wiki", async () => {
const fetchMock = jest.spyOn(global, "fetch").mockResolvedValue({
ok: true,
json: jest.fn().mockResolvedValue({ size: 0, results: [] }),
});
const loader = new ConfluencePagesLoader({
baseUrl: resolveConfluenceBaseUrl("https://my.domain.com/confluence/", false),
spaceKey: "SP",
username: "user",
accessToken: "token",
cloud: false,
});
await loader.fetchAllPagesInSpace();
expect(fetchMock).toHaveBeenCalledWith(
"https://my.domain.com/confluence/rest/api/content?spaceKey=SP&limit=25&start=0&expand=body.storage,version",
expect.any(Object)
);
});
test("page URLs use context path without /wiki", () => {
const loader = new ConfluencePagesLoader({
baseUrl: resolveConfluenceBaseUrl("https://my.domain.com/confluence/", false),
spaceKey: "SP",
username: "user",
accessToken: "token",
cloud: false,
});
const document = loader.createDocumentFromPage({
id: "123",
status: "current",
title: "Self-hosted page",
type: "page",
body: { storage: { value: "<p>Hello</p>" } },
version: { number: 1, by: { displayName: "User" }, when: "2026-01-01T00:00:00.000Z" },
});
expect(document.metadata.url).toBe(
"https://my.domain.com/confluence/spaces/SP/pages/123"
);
});
});
});

View File

@ -46,10 +46,11 @@ async function loadConfluence(
};
}
const { origin, hostname } = new URL(baseUrl);
console.log(`-- Working Confluence ${origin} --`);
const normalizedBaseUrl = resolveConfluenceBaseUrl(baseUrl, cloud);
const { hostname } = new URL(normalizedBaseUrl);
console.log(`-- Working Confluence ${normalizedBaseUrl} --`);
const loader = new ConfluencePagesLoader({
baseUrl: origin, // Use the origin to avoid issues with subdomains, ports, protocols, etc.
baseUrl: normalizedBaseUrl,
spaceKey,
username,
accessToken,
@ -98,13 +99,13 @@ async function loadConfluence(
id: v4(),
url: doc.metadata.url + ".page",
title: doc.metadata.title || doc.metadata.source,
docAuthor: origin,
docAuthor: normalizedBaseUrl,
description: doc.metadata.title,
docSource: `${origin} Confluence`,
docSource: `${normalizedBaseUrl} Confluence`,
chunkSource: generateChunkSource(
{
doc,
baseUrl: origin,
baseUrl: normalizedBaseUrl,
spaceKey,
accessToken,
username,
@ -182,8 +183,9 @@ async function fetchConfluencePage({
}
console.log(`-- Working Confluence Page ${pageUrl} --`);
const normalizedBaseUrl = resolveConfluenceBaseUrl(baseUrl, cloud);
const loader = new ConfluencePagesLoader({
baseUrl, // Should be the origin of the baseUrl
baseUrl: normalizedBaseUrl,
spaceKey,
username,
accessToken,
@ -243,6 +245,21 @@ function validBaseUrl(baseUrl) {
}
}
/**
* Resolves the Confluence base URL, preserving context paths for self-hosted deployments.
* @param {string} baseUrl
* @param {boolean} cloud
* @returns {string}
*/
function resolveConfluenceBaseUrl(baseUrl, cloud = true) {
const url = new URL(baseUrl);
// Cloud URLs use just the origin; self-hosted may have a context path like /confluence
if (cloud) return url.origin;
const contextPath = url.pathname.replace(/\/+$/, "");
return `${url.origin}${contextPath}`;
}
/**
* Generate the full chunkSource for a specific Confluence page so that we can resync it later.
* This data is encrypted into a single `payload` query param so we can replay credentials later
@ -271,4 +288,5 @@ function generateChunkSource(
module.exports = {
loadConfluence,
fetchConfluencePage,
resolveConfluenceBaseUrl,
};