diff --git a/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js b/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js index 52e6fc7a..10bb5ac3 100644 --- a/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js +++ b/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js @@ -1,4 +1,5 @@ const ignore = require("ignore"); +const MAX_RETRIES = 3; /** * @typedef {Object} RepoLoaderArgs @@ -46,6 +47,10 @@ class GitLabRepoLoader { this.branches = []; } + #wait(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); + } + #validGitlabUrl() { const validPatterns = [ /https:\/\/gitlab\.com\/(?[^\/]+)\/(?.*)/, @@ -322,26 +327,37 @@ ${body}` * @param {string} sourceFilePath - The path to the file in the repository. * @returns {Promise} The content of the file, or null if fetching fails. */ - async fetchSingleFileContents(sourceFilePath) { + async fetchSingleFileContents(sourceFilePath, retries = 0) { try { - const data = await fetch( - `${this.apiBase}/api/v4/projects/${ - this.projectId - }/repository/files/${encodeURIComponent(sourceFilePath)}/raw?ref=${ - this.branch - }`, - { - method: "GET", - headers: this.accessToken - ? { "PRIVATE-TOKEN": this.accessToken } - : {}, - } - ).then((res) => { - if (res.ok) return res.text(); - throw new Error(`Failed to fetch single file ${sourceFilePath}`); + const url = `${this.apiBase}/api/v4/projects/${ + this.projectId + }/repository/files/${encodeURIComponent(sourceFilePath)}/raw?ref=${ + this.branch + }`; + const response = await fetch(url, { + method: "GET", + headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {}, }); - return data; + if (response.status === 429) { + if (retries >= MAX_RETRIES) { + console.warn( + `[Gitlab Loader]: Rate limit persists for ${sourceFilePath} after ${retries} retries. Skipping.` + ); + return null; + } + const retryAfter = Number(response.headers.get("retry-after")) || 60; + console.warn( + `[Gitlab Loader]: Rate limit hit fetching ${sourceFilePath}. Waiting ${retryAfter}s...` + ); + await this.#wait(retryAfter * 1000); + return this.fetchSingleFileContents(sourceFilePath, retries + 1); + } + + if (!response.ok) + throw new Error(`Failed to fetch single file ${sourceFilePath}`); + + return await response.text(); } catch (e) { console.error(`RepoLoader.fetchSingleFileContents`, e); return null; @@ -353,7 +369,7 @@ ${body}` * @param {Object} requestData - The request data. * @returns {Promise|null>} The next page of data, or null if no more pages. */ - async fetchNextPage(requestData) { + async fetchNextPage(requestData, retries = 0) { try { if (requestData.page === -1) return null; if (!requestData.page) requestData.page = 1; @@ -371,28 +387,52 @@ ${body}` headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {}, }); - // Rate limits get hit very often if no PAT is provided + if (response.status === 429) { + if (retries >= MAX_RETRIES) { + console.warn( + `[Gitlab Loader]: Rate limit persists for ${endpoint} after ${retries} retries. Skipping.` + ); + return null; + } + const retryAfter = Number(response.headers.get("retry-after")) || 60; + console.warn( + `[Gitlab Loader]: Rate limit hit for ${endpoint}. Waiting ${retryAfter}s before retrying...` + ); + await this.#wait(retryAfter * 1000); + return this.fetchNextPage(requestData, retries + 1); + } + if (response.status === 401) { - console.warn(`Rate limit hit for ${endpoint}. Skipping.`); + console.warn( + `[Gitlab Loader]: Unauthorized request for ${endpoint}. Skipping.` + ); + return null; + } + + if (!response.ok) { + console.warn( + `[Gitlab Loader]: Unexpected status ${response.status} for ${endpoint}. Skipping.` + ); return null; } - const totalPages = Number(response.headers.get("x-total-pages")); const data = await response.json(); if (!Array.isArray(data)) { console.warn(`Unexpected response format for ${endpoint}:`, data); return []; } + // GitLab omits x-total-pages for large repos, so use x-next-page + // as the sole pagination signal — it's empty on the last page. + const nextPage = response.headers.get("x-next-page"); + const totalPages = response.headers.get("x-total-pages"); console.log( - `Gitlab RepoLoader: fetched ${endpoint} page ${requestData.page}/${totalPages} with ${data.length} records.` + `Gitlab RepoLoader: fetched ${endpoint} page ${requestData.page}${ + totalPages ? `/${totalPages}` : "" + } with ${data.length} records.` ); - if (totalPages === requestData.page) { - requestData.page = -1; - } else { - requestData.page = Number(response.headers.get("x-next-page")); - } + requestData.page = nextPage?.trim() ? Number(nextPage) : -1; return data; } catch (e) {