feat(runtime-fallback): add timeout toggle for quota retry detection

Make provider auto-retry signal detection respect timeout_seconds setting:
- When timeout_seconds=0, disable quota-based fallback escalation
- Only treat auto-retry signals as errors when timeout is enabled
- Add test to verify behavior when timeout_seconds is disabled
- Update documentation to explain timeout_seconds=0 behavior

This allows users to disable timeout-based fallbacks while keeping
error-based fallback functionality intact.
This commit is contained in:
Youngbin Kim
2026-02-12 17:49:13 -05:00
committed by YeonGyu-Kim
parent 8b2ae957e5
commit 68f5d982fc
3 changed files with 47 additions and 6 deletions

View File

@@ -738,7 +738,7 @@ Automatically switch to backup models when the primary model encounters retryabl
| `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. |
| `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) |
| `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model |
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model |
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. |
| `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model |
### How It Works
@@ -924,7 +924,7 @@ Automatically switch to backup models when the primary model encounters retryabl
| `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. |
| `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) |
| `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model |
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model |
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. |
| `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model |
### How It Works

View File

@@ -387,6 +387,45 @@ describe("runtime-fallback", () => {
expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" })
})
test("should NOT trigger fallback on auto-retry signal when timeout_seconds is 0", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 0 }),
pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]),
})
const sessionID = "test-session-auto-retry-timeout-disabled"
SessionCategoryRegistry.register(sessionID, "test")
await hook.event({
event: {
type: "session.created",
properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } },
},
})
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
sessionID,
role: "assistant",
model: "openai/gpt-5.3-codex",
status: "The usage limit has been reached [retrying in 27s attempt #6]",
},
},
},
})
// Should NOT detect provider auto-retry signal when timeout is disabled
const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
expect(signalLog).toBeUndefined()
// Should NOT trigger fallback
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
expect(fallbackLog).toBeUndefined()
})
test("should log when no fallback models configured", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig(),

View File

@@ -805,7 +805,9 @@ export function createRuntimeFallbackHook(
const sessionID = info?.sessionID as string | undefined
const retrySignalResult = extractAutoRetrySignal(info)
const retrySignal = retrySignalResult?.signal
const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
const timeoutEnabled = config.timeout_seconds > 0
// Only treat auto-retry signal as an error if timeout-based fallback is enabled
const error = info?.error ?? (retrySignal && timeoutEnabled ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
const role = info?.role as string | undefined
const model = info?.model as string | undefined
@@ -840,7 +842,7 @@ export function createRuntimeFallbackHook(
return
}
if (retrySignal && sessionRetryInFlight.has(sessionID)) {
if (retrySignal && sessionRetryInFlight.has(sessionID) && timeoutEnabled) {
log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, {
sessionID,
model,
@@ -849,7 +851,7 @@ export function createRuntimeFallbackHook(
sessionRetryInFlight.delete(sessionID)
}
if (retrySignal) {
if (retrySignal && timeoutEnabled) {
log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model })
}
@@ -918,7 +920,7 @@ export function createRuntimeFallbackHook(
sessionLastAccess.set(sessionID, Date.now())
if (state.pendingFallbackModel) {
if (retrySignal) {
if (retrySignal && timeoutEnabled) {
log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, {
sessionID,
pendingFallbackModel: state.pendingFallbackModel,