Handle non-ascii characters in single and bulk link scraper URLs (#2495)
handle non-ascii characters in urls
This commit is contained in:
parent
93d7ce6d34
commit
41522cdfb4
@ -27,7 +27,8 @@ async function scrapeGenericUrl(link, textOnly = false) {
|
||||
}
|
||||
|
||||
const url = new URL(link);
|
||||
const filename = (url.host + "-" + url.pathname).replace(".", "_");
|
||||
const decodedPathname = decodeURIComponent(url.pathname);
|
||||
const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`;
|
||||
|
||||
const data = {
|
||||
id: v4(),
|
||||
|
||||
@ -108,7 +108,8 @@ async function bulkScrapePages(links, outFolderPath) {
|
||||
}
|
||||
|
||||
const url = new URL(link);
|
||||
const filename = (url.host + "-" + url.pathname).replace(".", "_");
|
||||
const decodedPathname = decodeURIComponent(url.pathname);
|
||||
const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`;
|
||||
|
||||
const data = {
|
||||
id: v4(),
|
||||
|
||||
Loading…
Reference in New Issue
Block a user