feat(runtime-fallback): add timeout toggle for quota retry detection

Make provider auto-retry signal detection respect timeout_seconds setting: - When timeout_seconds=0, disable quota-based fallback escalation - Only treat auto-retry signals as errors when timeout is enabled - Add test to verify behavior when timeout_seconds is disabled - Update documentation to explain timeout_seconds=0 behavior This allows users to disable timeout-based fallbacks while keeping error-based fallback functionality intact.
2026-02-12 17:49:13 -05:00
parent 8b2ae957e5
commit 68f5d982fc
3 changed files with 47 additions and 6 deletions
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -738,7 +738,7 @@ Automatically switch to backup models when the primary model encounters retryabl
 | `retry_on_errors`       | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes.   |
 | `max_fallback_attempts` | `3`               | Maximum fallback attempts per session (1-10)                                |
 | `cooldown_seconds`      | `60`              | Cooldown in seconds before retrying a failed model                          |
-| `timeout_seconds`       | `30`              | Timeout in seconds for an in-flight fallback request before forcing the next fallback model                          |
+| `timeout_seconds`       | `30`              | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. |
 | `notify_on_fallback`    | `true`            | Show toast notification when switching to a fallback model                  |

 ### How It Works
@@ -924,7 +924,7 @@ Automatically switch to backup models when the primary model encounters retryabl
 | `retry_on_errors`       | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes.   |
 | `max_fallback_attempts` | `3`               | Maximum fallback attempts per session (1-10)                                |
 | `cooldown_seconds`      | `60`              | Cooldown in seconds before retrying a failed model                          |
-| `timeout_seconds`       | `30`              | Timeout in seconds for an in-flight fallback request before forcing the next fallback model                          |
+| `timeout_seconds`       | `30`              | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. |
 | `notify_on_fallback`    | `true`            | Show toast notification when switching to a fallback model                  |

 ### How It Works
--- a/src/hooks/runtime-fallback/index.test.ts
+++ b/src/hooks/runtime-fallback/index.test.ts
@@ -387,6 +387,45 @@ describe("runtime-fallback", () => {
      expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" })
    })

+    test("should NOT trigger fallback on auto-retry signal when timeout_seconds is 0", async () => {
+      const hook = createRuntimeFallbackHook(createMockPluginInput(), {
+        config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 0 }),
+        pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]),
+      })
+
+      const sessionID = "test-session-auto-retry-timeout-disabled"
+      SessionCategoryRegistry.register(sessionID, "test")
+
+      await hook.event({
+        event: {
+          type: "session.created",
+          properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } },
+        },
+      })
+
+      await hook.event({
+        event: {
+          type: "message.updated",
+          properties: {
+            info: {
+              sessionID,
+              role: "assistant",
+              model: "openai/gpt-5.3-codex",
+              status: "The usage limit has been reached [retrying in 27s attempt #6]",
+            },
+          },
+        },
+      })
+
+      // Should NOT detect provider auto-retry signal when timeout is disabled
+      const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
+      expect(signalLog).toBeUndefined()
+
+      // Should NOT trigger fallback
+      const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
+      expect(fallbackLog).toBeUndefined()
+    })
+
    test("should log when no fallback models configured", async () => {
      const hook = createRuntimeFallbackHook(createMockPluginInput(), {
        config: createMockConfig(),
--- a/src/hooks/runtime-fallback/index.ts
+++ b/src/hooks/runtime-fallback/index.ts
@@ -805,7 +805,9 @@ export function createRuntimeFallbackHook(
      const sessionID = info?.sessionID as string | undefined
      const retrySignalResult = extractAutoRetrySignal(info)
      const retrySignal = retrySignalResult?.signal
-      const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
+      const timeoutEnabled = config.timeout_seconds > 0
+      // Only treat auto-retry signal as an error if timeout-based fallback is enabled
+      const error = info?.error ?? (retrySignal && timeoutEnabled ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
      const role = info?.role as string | undefined
      const model = info?.model as string | undefined

@@ -840,7 +842,7 @@ export function createRuntimeFallbackHook(
          return
        }

-        if (retrySignal && sessionRetryInFlight.has(sessionID)) {
+        if (retrySignal && sessionRetryInFlight.has(sessionID) && timeoutEnabled) {
          log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, {
            sessionID,
            model,
@@ -849,7 +851,7 @@ export function createRuntimeFallbackHook(
          sessionRetryInFlight.delete(sessionID)
        }

-        if (retrySignal) {
+        if (retrySignal && timeoutEnabled) {
          log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model })
        }

@@ -918,7 +920,7 @@ export function createRuntimeFallbackHook(
          sessionLastAccess.set(sessionID, Date.now())

          if (state.pendingFallbackModel) {
-            if (retrySignal) {
+            if (retrySignal && timeoutEnabled) {
              log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, {
                sessionID,
                pendingFallbackModel: state.pendingFallbackModel,