diff --git a/docs/configurations.md b/docs/configurations.md index 2dc0d042a..631b381a9 100644 --- a/docs/configurations.md +++ b/docs/configurations.md @@ -738,7 +738,7 @@ Automatically switch to backup models when the primary model encounters retryabl | `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. | | `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) | | `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model | -| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model | +| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. | | `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model | ### How It Works @@ -924,7 +924,7 @@ Automatically switch to backup models when the primary model encounters retryabl | `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. | | `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) | | `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model | -| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model | +| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. | | `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model | ### How It Works diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts index 82895086c..4ce288c8b 100644 --- a/src/hooks/runtime-fallback/index.test.ts +++ b/src/hooks/runtime-fallback/index.test.ts @@ -387,6 +387,45 @@ describe("runtime-fallback", () => { expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" }) }) + test("should NOT trigger fallback on auto-retry signal when timeout_seconds is 0", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 0 }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]), + }) + + const sessionID = "test-session-auto-retry-timeout-disabled" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.3-codex", + status: "The usage limit has been reached [retrying in 27s attempt #6]", + }, + }, + }, + }) + + // Should NOT detect provider auto-retry signal when timeout is disabled + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeUndefined() + + // Should NOT trigger fallback + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeUndefined() + }) + test("should log when no fallback models configured", async () => { const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig(), diff --git a/src/hooks/runtime-fallback/index.ts b/src/hooks/runtime-fallback/index.ts index abefddf09..a117b24a6 100644 --- a/src/hooks/runtime-fallback/index.ts +++ b/src/hooks/runtime-fallback/index.ts @@ -805,7 +805,9 @@ export function createRuntimeFallbackHook( const sessionID = info?.sessionID as string | undefined const retrySignalResult = extractAutoRetrySignal(info) const retrySignal = retrySignalResult?.signal - const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined) + const timeoutEnabled = config.timeout_seconds > 0 + // Only treat auto-retry signal as an error if timeout-based fallback is enabled + const error = info?.error ?? (retrySignal && timeoutEnabled ? { name: "ProviderRateLimitError", message: retrySignal } : undefined) const role = info?.role as string | undefined const model = info?.model as string | undefined @@ -840,7 +842,7 @@ export function createRuntimeFallbackHook( return } - if (retrySignal && sessionRetryInFlight.has(sessionID)) { + if (retrySignal && sessionRetryInFlight.has(sessionID) && timeoutEnabled) { log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, { sessionID, model, @@ -849,7 +851,7 @@ export function createRuntimeFallbackHook( sessionRetryInFlight.delete(sessionID) } - if (retrySignal) { + if (retrySignal && timeoutEnabled) { log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model }) } @@ -918,7 +920,7 @@ export function createRuntimeFallbackHook( sessionLastAccess.set(sessionID, Date.now()) if (state.pendingFallbackModel) { - if (retrySignal) { + if (retrySignal && timeoutEnabled) { log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, { sessionID, pendingFallbackModel: state.pendingFallbackModel,