fix: preserve Confluence context paths (#5415)
* fix: preserve confluence context paths * lint and minor changes --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
parent
6ef114df19
commit
91e75c27c2
@ -0,0 +1,125 @@
|
||||
/* eslint-env jest, node */
|
||||
process.env.STORAGE_DIR = "test-storage";
|
||||
|
||||
const { resolveConfluenceBaseUrl } = require("../../../../utils/extensions/Confluence");
|
||||
const {
|
||||
ConfluencePagesLoader,
|
||||
} = require("../../../../utils/extensions/Confluence/ConfluenceLoader");
|
||||
|
||||
describe("resolveConfluenceBaseUrl", () => {
|
||||
test("cloud: strips path and returns origin only", () => {
|
||||
expect(
|
||||
resolveConfluenceBaseUrl("https://example.atlassian.net/wiki/spaces/SP", true)
|
||||
).toBe("https://example.atlassian.net");
|
||||
});
|
||||
|
||||
test("self-hosted: preserves context path, strips trailing slash", () => {
|
||||
expect(
|
||||
resolveConfluenceBaseUrl("https://my.domain.com/confluence/", false)
|
||||
).toBe("https://my.domain.com/confluence");
|
||||
});
|
||||
|
||||
test("self-hosted: returns origin when no context path", () => {
|
||||
expect(
|
||||
resolveConfluenceBaseUrl("https://my.domain.com/", false)
|
||||
).toBe("https://my.domain.com");
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfluencePagesLoader", () => {
|
||||
afterEach(() => {
|
||||
jest.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("cloud mode", () => {
|
||||
test("API requests include /wiki prefix", async () => {
|
||||
const fetchMock = jest.spyOn(global, "fetch").mockResolvedValue({
|
||||
ok: true,
|
||||
json: jest.fn().mockResolvedValue({ size: 0, results: [] }),
|
||||
});
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl: resolveConfluenceBaseUrl("https://example.atlassian.net/wiki/spaces/SP", true),
|
||||
spaceKey: "SP",
|
||||
username: "user",
|
||||
accessToken: "token",
|
||||
cloud: true,
|
||||
});
|
||||
|
||||
await loader.fetchAllPagesInSpace();
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://example.atlassian.net/wiki/rest/api/content?spaceKey=SP&limit=25&start=0&expand=body.storage,version",
|
||||
expect.any(Object)
|
||||
);
|
||||
});
|
||||
|
||||
test("page URLs include /wiki prefix", () => {
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl: resolveConfluenceBaseUrl("https://example.atlassian.net/wiki", true),
|
||||
spaceKey: "SP",
|
||||
username: "user",
|
||||
accessToken: "token",
|
||||
cloud: true,
|
||||
});
|
||||
|
||||
const document = loader.createDocumentFromPage({
|
||||
id: "123",
|
||||
status: "current",
|
||||
title: "Cloud page",
|
||||
type: "page",
|
||||
body: { storage: { value: "<p>Hello</p>" } },
|
||||
version: { number: 1, by: { displayName: "User" }, when: "2026-01-01T00:00:00.000Z" },
|
||||
});
|
||||
|
||||
expect(document.metadata.url).toBe(
|
||||
"https://example.atlassian.net/wiki/spaces/SP/pages/123"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("self-hosted mode", () => {
|
||||
test("API requests use context path without /wiki", async () => {
|
||||
const fetchMock = jest.spyOn(global, "fetch").mockResolvedValue({
|
||||
ok: true,
|
||||
json: jest.fn().mockResolvedValue({ size: 0, results: [] }),
|
||||
});
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl: resolveConfluenceBaseUrl("https://my.domain.com/confluence/", false),
|
||||
spaceKey: "SP",
|
||||
username: "user",
|
||||
accessToken: "token",
|
||||
cloud: false,
|
||||
});
|
||||
|
||||
await loader.fetchAllPagesInSpace();
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://my.domain.com/confluence/rest/api/content?spaceKey=SP&limit=25&start=0&expand=body.storage,version",
|
||||
expect.any(Object)
|
||||
);
|
||||
});
|
||||
|
||||
test("page URLs use context path without /wiki", () => {
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl: resolveConfluenceBaseUrl("https://my.domain.com/confluence/", false),
|
||||
spaceKey: "SP",
|
||||
username: "user",
|
||||
accessToken: "token",
|
||||
cloud: false,
|
||||
});
|
||||
|
||||
const document = loader.createDocumentFromPage({
|
||||
id: "123",
|
||||
status: "current",
|
||||
title: "Self-hosted page",
|
||||
type: "page",
|
||||
body: { storage: { value: "<p>Hello</p>" } },
|
||||
version: { number: 1, by: { displayName: "User" }, when: "2026-01-01T00:00:00.000Z" },
|
||||
});
|
||||
|
||||
expect(document.metadata.url).toBe(
|
||||
"https://my.domain.com/confluence/spaces/SP/pages/123"
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -46,10 +46,11 @@ async function loadConfluence(
|
||||
};
|
||||
}
|
||||
|
||||
const { origin, hostname } = new URL(baseUrl);
|
||||
console.log(`-- Working Confluence ${origin} --`);
|
||||
const normalizedBaseUrl = resolveConfluenceBaseUrl(baseUrl, cloud);
|
||||
const { hostname } = new URL(normalizedBaseUrl);
|
||||
console.log(`-- Working Confluence ${normalizedBaseUrl} --`);
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl: origin, // Use the origin to avoid issues with subdomains, ports, protocols, etc.
|
||||
baseUrl: normalizedBaseUrl,
|
||||
spaceKey,
|
||||
username,
|
||||
accessToken,
|
||||
@ -98,13 +99,13 @@ async function loadConfluence(
|
||||
id: v4(),
|
||||
url: doc.metadata.url + ".page",
|
||||
title: doc.metadata.title || doc.metadata.source,
|
||||
docAuthor: origin,
|
||||
docAuthor: normalizedBaseUrl,
|
||||
description: doc.metadata.title,
|
||||
docSource: `${origin} Confluence`,
|
||||
docSource: `${normalizedBaseUrl} Confluence`,
|
||||
chunkSource: generateChunkSource(
|
||||
{
|
||||
doc,
|
||||
baseUrl: origin,
|
||||
baseUrl: normalizedBaseUrl,
|
||||
spaceKey,
|
||||
accessToken,
|
||||
username,
|
||||
@ -182,8 +183,9 @@ async function fetchConfluencePage({
|
||||
}
|
||||
|
||||
console.log(`-- Working Confluence Page ${pageUrl} --`);
|
||||
const normalizedBaseUrl = resolveConfluenceBaseUrl(baseUrl, cloud);
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl, // Should be the origin of the baseUrl
|
||||
baseUrl: normalizedBaseUrl,
|
||||
spaceKey,
|
||||
username,
|
||||
accessToken,
|
||||
@ -243,6 +245,21 @@ function validBaseUrl(baseUrl) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the Confluence base URL, preserving context paths for self-hosted deployments.
|
||||
* @param {string} baseUrl
|
||||
* @param {boolean} cloud
|
||||
* @returns {string}
|
||||
*/
|
||||
function resolveConfluenceBaseUrl(baseUrl, cloud = true) {
|
||||
const url = new URL(baseUrl);
|
||||
// Cloud URLs use just the origin; self-hosted may have a context path like /confluence
|
||||
if (cloud) return url.origin;
|
||||
|
||||
const contextPath = url.pathname.replace(/\/+$/, "");
|
||||
return `${url.origin}${contextPath}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the full chunkSource for a specific Confluence page so that we can resync it later.
|
||||
* This data is encrypted into a single `payload` query param so we can replay credentials later
|
||||
@ -271,4 +288,5 @@ function generateChunkSource(
|
||||
module.exports = {
|
||||
loadConfluence,
|
||||
fetchConfluencePage,
|
||||
resolveConfluenceBaseUrl,
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user