feat(runtime-fallback): add timeout toggle for quota retry detection
Make provider auto-retry signal detection respect timeout_seconds setting: - When timeout_seconds=0, disable quota-based fallback escalation - Only treat auto-retry signals as errors when timeout is enabled - Add test to verify behavior when timeout_seconds is disabled - Update documentation to explain timeout_seconds=0 behavior This allows users to disable timeout-based fallbacks while keeping error-based fallback functionality intact.
This commit is contained in:
committed by
YeonGyu-Kim
parent
8b2ae957e5
commit
68f5d982fc
@@ -738,7 +738,7 @@ Automatically switch to backup models when the primary model encounters retryabl
|
||||
| `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. |
|
||||
| `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) |
|
||||
| `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model |
|
||||
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model |
|
||||
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. |
|
||||
| `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model |
|
||||
|
||||
### How It Works
|
||||
@@ -924,7 +924,7 @@ Automatically switch to backup models when the primary model encounters retryabl
|
||||
| `retry_on_errors` | `[429, 503, 529]` | HTTP status codes that trigger fallback (rate limit, service unavailable). Also supports certain classified provider errors (for example, missing API key) that do not expose HTTP status codes. |
|
||||
| `max_fallback_attempts` | `3` | Maximum fallback attempts per session (1-10) |
|
||||
| `cooldown_seconds` | `60` | Cooldown in seconds before retrying a failed model |
|
||||
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model |
|
||||
| `timeout_seconds` | `30` | Timeout in seconds for an in-flight fallback request before forcing the next fallback model. Set to `0` to disable timeout-based fallback and provider quota retry signal detection. |
|
||||
| `notify_on_fallback` | `true` | Show toast notification when switching to a fallback model |
|
||||
|
||||
### How It Works
|
||||
|
||||
@@ -387,6 +387,45 @@ describe("runtime-fallback", () => {
|
||||
expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" })
|
||||
})
|
||||
|
||||
test("should NOT trigger fallback on auto-retry signal when timeout_seconds is 0", async () => {
|
||||
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
||||
config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 0 }),
|
||||
pluginConfig: createMockPluginConfigWithCategoryFallback(["anthropic/claude-opus-4-6"]),
|
||||
})
|
||||
|
||||
const sessionID = "test-session-auto-retry-timeout-disabled"
|
||||
SessionCategoryRegistry.register(sessionID, "test")
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.created",
|
||||
properties: { info: { id: sessionID, model: "openai/gpt-5.3-codex" } },
|
||||
},
|
||||
})
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
sessionID,
|
||||
role: "assistant",
|
||||
model: "openai/gpt-5.3-codex",
|
||||
status: "The usage limit has been reached [retrying in 27s attempt #6]",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// Should NOT detect provider auto-retry signal when timeout is disabled
|
||||
const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal"))
|
||||
expect(signalLog).toBeUndefined()
|
||||
|
||||
// Should NOT trigger fallback
|
||||
const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
|
||||
expect(fallbackLog).toBeUndefined()
|
||||
})
|
||||
|
||||
test("should log when no fallback models configured", async () => {
|
||||
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
|
||||
config: createMockConfig(),
|
||||
|
||||
@@ -805,7 +805,9 @@ export function createRuntimeFallbackHook(
|
||||
const sessionID = info?.sessionID as string | undefined
|
||||
const retrySignalResult = extractAutoRetrySignal(info)
|
||||
const retrySignal = retrySignalResult?.signal
|
||||
const error = info?.error ?? (retrySignal ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
|
||||
const timeoutEnabled = config.timeout_seconds > 0
|
||||
// Only treat auto-retry signal as an error if timeout-based fallback is enabled
|
||||
const error = info?.error ?? (retrySignal && timeoutEnabled ? { name: "ProviderRateLimitError", message: retrySignal } : undefined)
|
||||
const role = info?.role as string | undefined
|
||||
const model = info?.model as string | undefined
|
||||
|
||||
@@ -840,7 +842,7 @@ export function createRuntimeFallbackHook(
|
||||
return
|
||||
}
|
||||
|
||||
if (retrySignal && sessionRetryInFlight.has(sessionID)) {
|
||||
if (retrySignal && sessionRetryInFlight.has(sessionID) && timeoutEnabled) {
|
||||
log(`[${HOOK_NAME}] Overriding in-flight retry due to provider auto-retry signal`, {
|
||||
sessionID,
|
||||
model,
|
||||
@@ -849,7 +851,7 @@ export function createRuntimeFallbackHook(
|
||||
sessionRetryInFlight.delete(sessionID)
|
||||
}
|
||||
|
||||
if (retrySignal) {
|
||||
if (retrySignal && timeoutEnabled) {
|
||||
log(`[${HOOK_NAME}] Detected provider auto-retry signal`, { sessionID, model })
|
||||
}
|
||||
|
||||
@@ -918,7 +920,7 @@ export function createRuntimeFallbackHook(
|
||||
sessionLastAccess.set(sessionID, Date.now())
|
||||
|
||||
if (state.pendingFallbackModel) {
|
||||
if (retrySignal) {
|
||||
if (retrySignal && timeoutEnabled) {
|
||||
log(`[${HOOK_NAME}] Clearing pending fallback due to provider auto-retry signal`, {
|
||||
sessionID,
|
||||
pendingFallbackModel: state.pendingFallbackModel,
|
||||
|
||||
Reference in New Issue
Block a user