Add retry handling to TG for transient failures (#5391)

* Add retry handling to TG for transient failures

* add async to promise
This commit is contained in:
Timothy Carambat 2026-04-08 15:10:02 -07:00 committed by GitHub
parent 82ef164314
commit 91c8319d8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -29,9 +29,29 @@ const {
class TelegramBotService { class TelegramBotService {
static _instance = null; static _instance = null;
static #MAX_POLLING_RETRIES = 10;
static #BASE_RETRY_DELAY_MS = 1000;
static #MAX_RETRY_DELAY_MS = 5 * 60 * 1000; // 5 minutes
static #NETWORK_ERROR_PATTERNS = [
"ETIMEDOUT",
"ECONNRESET",
"ECONNREFUSED",
"ENOTFOUND",
"ENETUNREACH",
"EHOSTUNREACH",
"EAI_AGAIN",
"EFATAL",
"socket hang up",
"network",
"timeout",
"409 Conflict",
];
/** @type {TelegramBot|null} */
#bot = null; #bot = null;
#config = null; #config = null;
#queue = new MessageQueue(); #queue = new MessageQueue();
#pollingRetry = { timer: null, count: 0 };
// Per-chat state: { workspaceSlug, threadSlug } // Per-chat state: { workspaceSlug, threadSlug }
#chatState = new Map(); #chatState = new Map();
// Pending pairing requests: chatId -> { code, telegramUsername, firstName } // Pending pairing requests: chatId -> { code, telegramUsername, firstName }
@ -138,8 +158,20 @@ class TelegramBotService {
Object.assign(this.#config, updates); Object.assign(this.#config, updates);
} }
/**
* Stop the bot and clear all state.
* @returns {Promise<void>}
*/
async stop() { async stop() {
if (!this.#bot) return; if (!this.#bot) return;
// Clear any pending retry timer
if (this.#pollingRetry.timer) {
clearTimeout(this.#pollingRetry.timer);
this.#pollingRetry.timer = null;
}
this.#pollingRetry.count = 0;
try { try {
await this.#bot.stopPolling(); await this.#bot.stopPolling();
} catch { } catch {
@ -172,23 +204,77 @@ class TelegramBotService {
} }
/** /**
* Handle polling errors with special handling for 401 Unauthorized. * Check if an error is a transient network issue that warrants retry.
* - 401 errors: Self-cleanup and delete connector */
* - Other HTTP error codes: Stop polling immediately #isNetworkError(error) {
const msg = (error.message || "").toLowerCase();
return TelegramBotService.#NETWORK_ERROR_PATTERNS.some(
(p) => msg.includes(p.toLowerCase()) || error.code === p
);
}
/**
* Handle polling errors with retry logic for network issues.
* - 401 errors: Self-cleanup and delete connector (token invalid)
* - Network errors (ETIMEDOUT, ECONNRESET, etc.): Retry with exponential backoff
* - Other errors: Stop polling immediately
*/ */
async #handlePollingError(error) { async #handlePollingError(error) {
// Ignore errors while already waiting to retry
if (this.#pollingRetry.timer) return;
this.#log("Polling error:", error.message); this.#log("Polling error:", error.message);
// 401 = invalid token, cleanup and stop
if (error.message?.includes("401")) { if (error.message?.includes("401")) {
this.#log( this.#log(
"Got 401 - bot token may be invalid. Stopping polling and deleting connector." "Got 401 - bot token invalid. Stopping and deleting connector."
); );
return this.#selfCleanup("401 Unauthorized"); return this.#selfCleanup("401 Unauthorized");
} }
this.#log( // For non-network errors, stop immediately, but don't delete the connector
`Got HTTP error ${error.message}. Stopping polling to prevent further errors.` if (!this.#isNetworkError(error)) {
this.#log(`Got HTTP error ${error.message}. Stopping polling.`);
return await this.stop();
}
// Network error - attempt retry with exponential backoff
const maxRetries = TelegramBotService.#MAX_POLLING_RETRIES;
this.#pollingRetry.count++;
if (this.#pollingRetry.count > maxRetries) {
this.#log(
`Network error. Max retries (${maxRetries}) exceeded. Stopping.`
);
this.#pollingRetry.count = 0;
return await this.stop();
}
const delay = Math.min(
TelegramBotService.#BASE_RETRY_DELAY_MS *
Math.pow(2, this.#pollingRetry.count - 1),
TelegramBotService.#MAX_RETRY_DELAY_MS
); );
return this.stop(); this.#log(
`Network error. Retry ${this.#pollingRetry.count}/${maxRetries} in ${Math.round(delay / 1000)}s...`
);
this.#pollingRetry.timer = setTimeout(async () => {
this.#pollingRetry.timer = null;
if (!this.#bot || !this.#config) return;
try {
await this.#bot.stopPolling();
} catch {}
this.#log("Attempting to restart polling...");
try {
await this.#bot.startPolling();
this.#log("Polling restarted successfully.");
} catch (err) {
this.#log("Failed to restart polling:", err.message);
await this.stop();
}
}, delay);
} }
/** /**
@ -322,10 +408,21 @@ class TelegramBotService {
return false; return false;
} }
/**
* Reset the polling retry state and clear the timer if it exists.
*/
#resetPollingRetry() {
this.#pollingRetry.count = 0;
if (this.#pollingRetry.timer) clearTimeout(this.#pollingRetry.timer);
this.#pollingRetry.timer = null;
}
#setupHandlers() { #setupHandlers() {
const ctx = this.#createContext(); const ctx = this.#createContext();
const guard = async (msg, handler) => { const guard = async (msg, handler) => {
if (!this.#config) return; if (!this.#config) return;
this.#resetPollingRetry(); // Reset the polling on successful message receipt
if (!isVerified(this.#config.approved_users, msg.chat.id)) { if (!isVerified(this.#config.approved_users, msg.chat.id)) {
sendPairingRequest(this.#bot, msg, this.#pendingPairings); sendPairingRequest(this.#bot, msg, this.#pendingPairings);
return; return;