fix: GitLab connector infinite loop and rate limit crash for large repos (#5021)
* Fix infinite loop and rate limit crashes * simplify logic | add max-retries to fetchNextPage and fetchSingleFileContents --------- Co-authored-by: shatfield4 <seanhatfield5@gmail.com> Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
parent
d325b07182
commit
c927eda18f
@ -1,4 +1,5 @@
|
||||
const ignore = require("ignore");
|
||||
const MAX_RETRIES = 3;
|
||||
|
||||
/**
|
||||
* @typedef {Object} RepoLoaderArgs
|
||||
@ -46,6 +47,10 @@ class GitLabRepoLoader {
|
||||
this.branches = [];
|
||||
}
|
||||
|
||||
#wait(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
#validGitlabUrl() {
|
||||
const validPatterns = [
|
||||
/https:\/\/gitlab\.com\/(?<author>[^\/]+)\/(?<project>.*)/,
|
||||
@ -322,26 +327,37 @@ ${body}`
|
||||
* @param {string} sourceFilePath - The path to the file in the repository.
|
||||
* @returns {Promise<string|null>} The content of the file, or null if fetching fails.
|
||||
*/
|
||||
async fetchSingleFileContents(sourceFilePath) {
|
||||
async fetchSingleFileContents(sourceFilePath, retries = 0) {
|
||||
try {
|
||||
const data = await fetch(
|
||||
`${this.apiBase}/api/v4/projects/${
|
||||
const url = `${this.apiBase}/api/v4/projects/${
|
||||
this.projectId
|
||||
}/repository/files/${encodeURIComponent(sourceFilePath)}/raw?ref=${
|
||||
this.branch
|
||||
}`,
|
||||
{
|
||||
}`;
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: this.accessToken
|
||||
? { "PRIVATE-TOKEN": this.accessToken }
|
||||
: {},
|
||||
}
|
||||
).then((res) => {
|
||||
if (res.ok) return res.text();
|
||||
throw new Error(`Failed to fetch single file ${sourceFilePath}`);
|
||||
headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {},
|
||||
});
|
||||
|
||||
return data;
|
||||
if (response.status === 429) {
|
||||
if (retries >= MAX_RETRIES) {
|
||||
console.warn(
|
||||
`[Gitlab Loader]: Rate limit persists for ${sourceFilePath} after ${retries} retries. Skipping.`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
const retryAfter = Number(response.headers.get("retry-after")) || 60;
|
||||
console.warn(
|
||||
`[Gitlab Loader]: Rate limit hit fetching ${sourceFilePath}. Waiting ${retryAfter}s...`
|
||||
);
|
||||
await this.#wait(retryAfter * 1000);
|
||||
return this.fetchSingleFileContents(sourceFilePath, retries + 1);
|
||||
}
|
||||
|
||||
if (!response.ok)
|
||||
throw new Error(`Failed to fetch single file ${sourceFilePath}`);
|
||||
|
||||
return await response.text();
|
||||
} catch (e) {
|
||||
console.error(`RepoLoader.fetchSingleFileContents`, e);
|
||||
return null;
|
||||
@ -353,7 +369,7 @@ ${body}`
|
||||
* @param {Object} requestData - The request data.
|
||||
* @returns {Promise<Array<Object>|null>} The next page of data, or null if no more pages.
|
||||
*/
|
||||
async fetchNextPage(requestData) {
|
||||
async fetchNextPage(requestData, retries = 0) {
|
||||
try {
|
||||
if (requestData.page === -1) return null;
|
||||
if (!requestData.page) requestData.page = 1;
|
||||
@ -371,28 +387,52 @@ ${body}`
|
||||
headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {},
|
||||
});
|
||||
|
||||
// Rate limits get hit very often if no PAT is provided
|
||||
if (response.status === 429) {
|
||||
if (retries >= MAX_RETRIES) {
|
||||
console.warn(
|
||||
`[Gitlab Loader]: Rate limit persists for ${endpoint} after ${retries} retries. Skipping.`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
const retryAfter = Number(response.headers.get("retry-after")) || 60;
|
||||
console.warn(
|
||||
`[Gitlab Loader]: Rate limit hit for ${endpoint}. Waiting ${retryAfter}s before retrying...`
|
||||
);
|
||||
await this.#wait(retryAfter * 1000);
|
||||
return this.fetchNextPage(requestData, retries + 1);
|
||||
}
|
||||
|
||||
if (response.status === 401) {
|
||||
console.warn(`Rate limit hit for ${endpoint}. Skipping.`);
|
||||
console.warn(
|
||||
`[Gitlab Loader]: Unauthorized request for ${endpoint}. Skipping.`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(
|
||||
`[Gitlab Loader]: Unexpected status ${response.status} for ${endpoint}. Skipping.`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const totalPages = Number(response.headers.get("x-total-pages"));
|
||||
const data = await response.json();
|
||||
if (!Array.isArray(data)) {
|
||||
console.warn(`Unexpected response format for ${endpoint}:`, data);
|
||||
return [];
|
||||
}
|
||||
|
||||
// GitLab omits x-total-pages for large repos, so use x-next-page
|
||||
// as the sole pagination signal — it's empty on the last page.
|
||||
const nextPage = response.headers.get("x-next-page");
|
||||
const totalPages = response.headers.get("x-total-pages");
|
||||
console.log(
|
||||
`Gitlab RepoLoader: fetched ${endpoint} page ${requestData.page}/${totalPages} with ${data.length} records.`
|
||||
`Gitlab RepoLoader: fetched ${endpoint} page ${requestData.page}${
|
||||
totalPages ? `/${totalPages}` : ""
|
||||
} with ${data.length} records.`
|
||||
);
|
||||
|
||||
if (totalPages === requestData.page) {
|
||||
requestData.page = -1;
|
||||
} else {
|
||||
requestData.page = Number(response.headers.get("x-next-page"));
|
||||
}
|
||||
requestData.page = nextPage?.trim() ? Number(nextPage) : -1;
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user