fix: GitLab connector infinite loop and rate limit crash for large repos (#5021)

* Fix infinite loop and rate limit crashes

* simplify logic | add max-retries to fetchNextPage and fetchSingleFileContents

---------

Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
Marcello Fitton 2026-02-19 12:42:21 -08:00 committed by GitHub
parent d325b07182
commit c927eda18f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,4 +1,5 @@
const ignore = require("ignore"); const ignore = require("ignore");
const MAX_RETRIES = 3;
/** /**
* @typedef {Object} RepoLoaderArgs * @typedef {Object} RepoLoaderArgs
@ -46,6 +47,10 @@ class GitLabRepoLoader {
this.branches = []; this.branches = [];
} }
#wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
#validGitlabUrl() { #validGitlabUrl() {
const validPatterns = [ const validPatterns = [
/https:\/\/gitlab\.com\/(?<author>[^\/]+)\/(?<project>.*)/, /https:\/\/gitlab\.com\/(?<author>[^\/]+)\/(?<project>.*)/,
@ -322,26 +327,37 @@ ${body}`
* @param {string} sourceFilePath - The path to the file in the repository. * @param {string} sourceFilePath - The path to the file in the repository.
* @returns {Promise<string|null>} The content of the file, or null if fetching fails. * @returns {Promise<string|null>} The content of the file, or null if fetching fails.
*/ */
async fetchSingleFileContents(sourceFilePath) { async fetchSingleFileContents(sourceFilePath, retries = 0) {
try { try {
const data = await fetch( const url = `${this.apiBase}/api/v4/projects/${
`${this.apiBase}/api/v4/projects/${ this.projectId
this.projectId }/repository/files/${encodeURIComponent(sourceFilePath)}/raw?ref=${
}/repository/files/${encodeURIComponent(sourceFilePath)}/raw?ref=${ this.branch
this.branch }`;
}`, const response = await fetch(url, {
{ method: "GET",
method: "GET", headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {},
headers: this.accessToken
? { "PRIVATE-TOKEN": this.accessToken }
: {},
}
).then((res) => {
if (res.ok) return res.text();
throw new Error(`Failed to fetch single file ${sourceFilePath}`);
}); });
return data; if (response.status === 429) {
if (retries >= MAX_RETRIES) {
console.warn(
`[Gitlab Loader]: Rate limit persists for ${sourceFilePath} after ${retries} retries. Skipping.`
);
return null;
}
const retryAfter = Number(response.headers.get("retry-after")) || 60;
console.warn(
`[Gitlab Loader]: Rate limit hit fetching ${sourceFilePath}. Waiting ${retryAfter}s...`
);
await this.#wait(retryAfter * 1000);
return this.fetchSingleFileContents(sourceFilePath, retries + 1);
}
if (!response.ok)
throw new Error(`Failed to fetch single file ${sourceFilePath}`);
return await response.text();
} catch (e) { } catch (e) {
console.error(`RepoLoader.fetchSingleFileContents`, e); console.error(`RepoLoader.fetchSingleFileContents`, e);
return null; return null;
@ -353,7 +369,7 @@ ${body}`
* @param {Object} requestData - The request data. * @param {Object} requestData - The request data.
* @returns {Promise<Array<Object>|null>} The next page of data, or null if no more pages. * @returns {Promise<Array<Object>|null>} The next page of data, or null if no more pages.
*/ */
async fetchNextPage(requestData) { async fetchNextPage(requestData, retries = 0) {
try { try {
if (requestData.page === -1) return null; if (requestData.page === -1) return null;
if (!requestData.page) requestData.page = 1; if (!requestData.page) requestData.page = 1;
@ -371,28 +387,52 @@ ${body}`
headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {}, headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {},
}); });
// Rate limits get hit very often if no PAT is provided if (response.status === 429) {
if (retries >= MAX_RETRIES) {
console.warn(
`[Gitlab Loader]: Rate limit persists for ${endpoint} after ${retries} retries. Skipping.`
);
return null;
}
const retryAfter = Number(response.headers.get("retry-after")) || 60;
console.warn(
`[Gitlab Loader]: Rate limit hit for ${endpoint}. Waiting ${retryAfter}s before retrying...`
);
await this.#wait(retryAfter * 1000);
return this.fetchNextPage(requestData, retries + 1);
}
if (response.status === 401) { if (response.status === 401) {
console.warn(`Rate limit hit for ${endpoint}. Skipping.`); console.warn(
`[Gitlab Loader]: Unauthorized request for ${endpoint}. Skipping.`
);
return null;
}
if (!response.ok) {
console.warn(
`[Gitlab Loader]: Unexpected status ${response.status} for ${endpoint}. Skipping.`
);
return null; return null;
} }
const totalPages = Number(response.headers.get("x-total-pages"));
const data = await response.json(); const data = await response.json();
if (!Array.isArray(data)) { if (!Array.isArray(data)) {
console.warn(`Unexpected response format for ${endpoint}:`, data); console.warn(`Unexpected response format for ${endpoint}:`, data);
return []; return [];
} }
// GitLab omits x-total-pages for large repos, so use x-next-page
// as the sole pagination signal — it's empty on the last page.
const nextPage = response.headers.get("x-next-page");
const totalPages = response.headers.get("x-total-pages");
console.log( console.log(
`Gitlab RepoLoader: fetched ${endpoint} page ${requestData.page}/${totalPages} with ${data.length} records.` `Gitlab RepoLoader: fetched ${endpoint} page ${requestData.page}${
totalPages ? `/${totalPages}` : ""
} with ${data.length} records.`
); );
if (totalPages === requestData.page) { requestData.page = nextPage?.trim() ? Number(nextPage) : -1;
requestData.page = -1;
} else {
requestData.page = Number(response.headers.get("x-next-page"));
}
return data; return data;
} catch (e) { } catch (e) {