Fix/drupal wiki (improve table & url handling) (#4097)
* feat: add support for custom table formatting in htmlToText conversion * fix tables * feat: improve plain text table formatting for AI readability * fix options * improve drupal wiki connector * final fix * adjust leading slash to match code * linting --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
2c778e2a75
commit
14fa079953
@ -219,7 +219,9 @@ class DrupalWiki {
|
||||
pageId: pageId,
|
||||
accessToken: this.accessToken,
|
||||
};
|
||||
return `drupalwiki://${this.baseUrl}?payload=${encryptionWorker.encrypt(
|
||||
return `drupalwiki://${
|
||||
this.baseUrl
|
||||
}/node/${pageId}?payload=${encryptionWorker.encrypt(
|
||||
JSON.stringify(payload)
|
||||
)}`;
|
||||
}
|
||||
@ -259,17 +261,27 @@ class DrupalWiki {
|
||||
* @private
|
||||
*/
|
||||
#processPageBody({ body, url, title, lastModified }) {
|
||||
// use the title as content if there is none
|
||||
const textContent = body.trim() !== "" ? body : title;
|
||||
|
||||
const plainTextContent = htmlToText(textContent, {
|
||||
wordwrap: false,
|
||||
preserveNewlines: true,
|
||||
selectors: [
|
||||
{
|
||||
selector: "table",
|
||||
format: "dataTable",
|
||||
options: {
|
||||
colSpacing: 3,
|
||||
rowSpacing: 1,
|
||||
uppercaseHeaderCells: true,
|
||||
maxColumnWidth: Infinity,
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
// preserve structure
|
||||
|
||||
const plainBody = plainTextContent.replace(/\n{3,}/g, "\n\n");
|
||||
// add the link to the document
|
||||
return `Link/URL: ${url}\n\n${plainBody}`;
|
||||
return plainBody;
|
||||
}
|
||||
|
||||
async #downloadAndProcessAttachments(pageId) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user