Fix: File pulling fails with uppercase URL characters (#4516)

* fix: remove unnecessary toLowerCase in URL validation

* test: enhance URL validation tests to preserve case sensitivity and format

* test: update URL validation tests to ensure domain normalization to lowercase while preserving path case

* small formatting

* fix filenames when downloading live URI

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Marcello Fitton 2025-10-08 14:00:02 -07:00 committed by GitHub
parent 4e6f0b33ab
commit d48c76919c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 11 deletions

View File

@ -74,8 +74,10 @@ describe("validURL", () => {
});
describe("validateURL", () => {
it("should return the exact same URL if it's already valid", () => {
expect(validateURL("https://www.google.com")).toBe("https://www.google.com");
it("should return the same URL if it's already valid", () => {
expect(validateURL("https://www.google.com")).toBe(
"https://www.google.com"
);
expect(validateURL("http://www.google.com")).toBe("http://www.google.com");
expect(validateURL("https://random")).toBe("https://random");
@ -88,16 +90,22 @@ describe("validateURL", () => {
it("should assume https:// if the URL doesn't have a protocol", () => {
expect(validateURL("www.google.com")).toBe("https://www.google.com");
expect(validateURL("google.com")).toBe("https://google.com");
expect(validateURL("EXAMPLE.com/ABCDEF/q1=UPPER")).toBe("https://example.com/ABCDEF/q1=UPPER");
expect(validateURL("ftp://www.google.com")).toBe("ftp://www.google.com");
expect(validateURL("mailto://www.google.com")).toBe("mailto://www.google.com");
expect(validateURL("mailto://www.google.com")).toBe(
"mailto://www.google.com"
);
expect(validateURL("tel://www.google.com")).toBe("tel://www.google.com");
expect(validateURL("data://www.google.com")).toBe("data://www.google.com");
});
it("should remove trailing slashes post-validation", () => {
expect(validateURL("https://www.google.com/")).toBe("https://www.google.com");
expect(validateURL("https://www.google.com/")).toBe(
"https://www.google.com"
);
expect(validateURL("http://www.google.com/")).toBe("http://www.google.com");
expect(validateURL("https://random/")).toBe("https://random");
expect(validateURL("https://example.com/ABCDEF/")).toBe("https://example.com/ABCDEF");
});
it("should handle edge cases and bad data inputs", () => {
@ -109,4 +117,13 @@ describe("validateURL", () => {
expect(validateURL(" ")).toBe("");
expect(validateURL(" look here! ")).toBe("look here!");
});
it("should preserve case of characters in URL pathname", () => {
expect(validateURL("https://example.com/To/ResOURce?q1=Value&qZ22=UPPE!R"))
.toBe("https://example.com/To/ResOURce?q1=Value&qZ22=UPPE!R");
expect(validateURL("https://sample.com/uPeRCaSe"))
.toBe("https://sample.com/uPeRCaSe");
expect(validateURL("Example.com/PATH/To/Resource?q2=Value&q1=UPPER"))
.toBe("https://example.com/PATH/To/Resource?q2=Value&q1=UPPER");
});
});

View File

@ -3,6 +3,7 @@ const fs = require("fs");
const path = require("path");
const { pipeline } = require("stream/promises");
const { validURL } = require("../url");
const { default: slugify } = require("slugify");
/**
* Download a file to the hotdir
@ -31,7 +32,12 @@ async function downloadURIToFile(url, maxTimeout = 10_000) {
})
.finally(() => clearTimeout(timeout));
const localFilePath = path.join(WATCH_DIRECTORY, path.basename(url));
const urlObj = new URL(url);
const filename = `${urlObj.hostname}-${slugify(
urlObj.pathname.replace(/\//g, "-"),
{ lower: true }
)}`;
const localFilePath = path.join(WATCH_DIRECTORY, filename);
const writeStream = fs.createWriteStream(localFilePath);
await pipeline(res.body, writeStream);

View File

@ -80,14 +80,12 @@ function validURL(url) {
*/
function validateURL(url) {
try {
let destination = url.trim().toLowerCase();
let destination = url.trim();
// If the URL has a protocol, just pass through
if (destination.includes("://")) {
// If the URL doesn't have a protocol, assume https://
if (destination.includes("://"))
destination = new URL(destination).toString();
} else {
// If the URL doesn't have a protocol, assume https://
destination = new URL(`https://${destination.trim()}`).toString();
}
else destination = new URL(`https://${destination}`).toString();
// If the URL ends with a slash, remove it
return destination.endsWith("/") ? destination.slice(0, -1) : destination;