Fix/drupal wiki (improve table & url handling) (#4097)

* feat: add support for custom table formatting in htmlToText conversion

* fix tables

* feat: improve plain text table formatting for AI readability

* fix options

* improve drupal wiki connector

* final fix

* adjust leading slash to match code

* linting

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
rexjohannes 2025-07-07 22:39:38 +02:00 committed by GitHub
parent 2c778e2a75
commit 14fa079953
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -219,7 +219,9 @@ class DrupalWiki {
pageId: pageId,
accessToken: this.accessToken,
};
return `drupalwiki://${this.baseUrl}?payload=${encryptionWorker.encrypt(
return `drupalwiki://${
this.baseUrl
}/node/${pageId}?payload=${encryptionWorker.encrypt(
JSON.stringify(payload)
)}`;
}
@ -259,17 +261,27 @@ class DrupalWiki {
* @private
*/
#processPageBody({ body, url, title, lastModified }) {
// use the title as content if there is none
const textContent = body.trim() !== "" ? body : title;
const plainTextContent = htmlToText(textContent, {
wordwrap: false,
preserveNewlines: true,
selectors: [
{
selector: "table",
format: "dataTable",
options: {
colSpacing: 3,
rowSpacing: 1,
uppercaseHeaderCells: true,
maxColumnWidth: Infinity,
},
},
],
});
// preserve structure
const plainBody = plainTextContent.replace(/\n{3,}/g, "\n\n");
// add the link to the document
return `Link/URL: ${url}\n\n${plainBody}`;
return plainBody;
}
async #downloadAndProcessAttachments(pageId) {