Merge pull request #2655 from tad-hq/infinite-circuit-target-fix
fix(circuit-breaker): make repetitive detection target-aware and add enabled escape hatch
This commit is contained in:
@@ -3708,6 +3708,9 @@
|
||||
"circuitBreaker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"maxToolCalls": {
|
||||
"type": "integer",
|
||||
"minimum": 10,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { z } from "zod"
|
||||
|
||||
const CircuitBreakerConfigSchema = z.object({
|
||||
enabled: z.boolean().optional(),
|
||||
maxToolCalls: z.number().int().min(10).optional(),
|
||||
windowSize: z.number().int().min(5).optional(),
|
||||
repetitionThresholdPercent: z.number().gt(0).max(100).optional(),
|
||||
|
||||
@@ -9,6 +9,7 @@ export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
|
||||
export const DEFAULT_MAX_TOOL_CALLS = 200
|
||||
export const DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE = 20
|
||||
export const DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT = 80
|
||||
export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true
|
||||
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
|
||||
export const MIN_IDLE_TIME_MS = 5000
|
||||
export const POLLING_INTERVAL_MS = 3000
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import {
|
||||
createToolCallSignature,
|
||||
detectRepetitiveToolUse,
|
||||
recordToolCall,
|
||||
resolveCircuitBreakerSettings,
|
||||
@@ -17,6 +18,17 @@ function buildWindow(
|
||||
)
|
||||
}
|
||||
|
||||
function buildWindowWithInputs(
|
||||
calls: Array<{ tool: string; input?: Record<string, unknown> }>,
|
||||
override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
|
||||
) {
|
||||
const settings = resolveCircuitBreakerSettings(override)
|
||||
return calls.reduce(
|
||||
(window, { tool, input }) => recordToolCall(window, tool, settings, input),
|
||||
undefined as ReturnType<typeof recordToolCall> | undefined
|
||||
)
|
||||
}
|
||||
|
||||
describe("loop-detector", () => {
|
||||
describe("resolveCircuitBreakerSettings", () => {
|
||||
describe("#given nested circuit breaker config", () => {
|
||||
@@ -31,12 +43,90 @@ describe("loop-detector", () => {
|
||||
})
|
||||
|
||||
expect(result).toEqual({
|
||||
enabled: true,
|
||||
maxToolCalls: 120,
|
||||
windowSize: 10,
|
||||
repetitionThresholdPercent: 70,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given no enabled config", () => {
|
||||
test("#when resolved #then enabled defaults to true", () => {
|
||||
const result = resolveCircuitBreakerSettings({
|
||||
circuitBreaker: {
|
||||
maxToolCalls: 100,
|
||||
windowSize: 5,
|
||||
repetitionThresholdPercent: 60,
|
||||
},
|
||||
})
|
||||
|
||||
expect(result.enabled).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given enabled is false in config", () => {
|
||||
test("#when resolved #then enabled is false", () => {
|
||||
const result = resolveCircuitBreakerSettings({
|
||||
circuitBreaker: {
|
||||
enabled: false,
|
||||
maxToolCalls: 100,
|
||||
windowSize: 5,
|
||||
repetitionThresholdPercent: 60,
|
||||
},
|
||||
})
|
||||
|
||||
expect(result.enabled).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given enabled is true in config", () => {
|
||||
test("#when resolved #then enabled is true", () => {
|
||||
const result = resolveCircuitBreakerSettings({
|
||||
circuitBreaker: {
|
||||
enabled: true,
|
||||
maxToolCalls: 100,
|
||||
windowSize: 5,
|
||||
repetitionThresholdPercent: 60,
|
||||
},
|
||||
})
|
||||
|
||||
expect(result.enabled).toBe(true)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("createToolCallSignature", () => {
|
||||
test("#given tool with input #when signature created #then includes tool and sorted input", () => {
|
||||
const result = createToolCallSignature("read", { filePath: "/a.ts" })
|
||||
|
||||
expect(result).toBe('read::{"filePath":"/a.ts"}')
|
||||
})
|
||||
|
||||
test("#given tool with undefined input #when signature created #then returns bare tool name", () => {
|
||||
const result = createToolCallSignature("read", undefined)
|
||||
|
||||
expect(result).toBe("read")
|
||||
})
|
||||
|
||||
test("#given tool with null input #when signature created #then returns bare tool name", () => {
|
||||
const result = createToolCallSignature("read", null)
|
||||
|
||||
expect(result).toBe("read")
|
||||
})
|
||||
|
||||
test("#given tool with empty object input #when signature created #then returns bare tool name", () => {
|
||||
const result = createToolCallSignature("read", {})
|
||||
|
||||
expect(result).toBe("read")
|
||||
})
|
||||
|
||||
test("#given same input different key order #when signatures compared #then they are equal", () => {
|
||||
const first = createToolCallSignature("read", { filePath: "/a.ts", offset: 0 })
|
||||
const second = createToolCallSignature("read", { offset: 0, filePath: "/a.ts" })
|
||||
|
||||
expect(first).toBe(second)
|
||||
})
|
||||
})
|
||||
|
||||
describe("detectRepetitiveToolUse", () => {
|
||||
@@ -113,5 +203,56 @@ describe("loop-detector", () => {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given same tool with different file inputs", () => {
|
||||
test("#when evaluated #then it does not trigger", () => {
|
||||
const calls = Array.from({ length: 20 }, (_, i) => ({
|
||||
tool: "read",
|
||||
input: { filePath: `/src/file-${i}.ts` },
|
||||
}))
|
||||
const window = buildWindowWithInputs(calls, {
|
||||
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
|
||||
})
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
expect(result.triggered).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given same tool with identical file inputs", () => {
|
||||
test("#when evaluated #then it triggers with bare tool name", () => {
|
||||
const calls = [
|
||||
...Array.from({ length: 16 }, () => ({ tool: "read", input: { filePath: "/src/same.ts" } })),
|
||||
{ tool: "grep", input: { pattern: "foo" } },
|
||||
{ tool: "edit", input: { filePath: "/src/other.ts" } },
|
||||
{ tool: "bash", input: { command: "ls" } },
|
||||
{ tool: "glob", input: { pattern: "**/*.ts" } },
|
||||
]
|
||||
const window = buildWindowWithInputs(calls, {
|
||||
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
|
||||
})
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
expect(result.triggered).toBe(true)
|
||||
expect(result.toolName).toBe("read")
|
||||
expect(result.repeatedCount).toBe(16)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given tool calls with no input", () => {
|
||||
test("#when the same tool dominates #then falls back to name-only detection", () => {
|
||||
const calls = [
|
||||
...Array.from({ length: 16 }, () => ({ tool: "read" })),
|
||||
{ tool: "grep" },
|
||||
{ tool: "edit" },
|
||||
{ tool: "bash" },
|
||||
{ tool: "glob" },
|
||||
]
|
||||
const window = buildWindowWithInputs(calls, {
|
||||
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
|
||||
})
|
||||
const result = detectRepetitiveToolUse(window)
|
||||
expect(result.triggered).toBe(true)
|
||||
expect(result.toolName).toBe("read")
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { BackgroundTaskConfig } from "../../config/schema"
|
||||
import {
|
||||
DEFAULT_CIRCUIT_BREAKER_ENABLED,
|
||||
DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT,
|
||||
DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
|
||||
DEFAULT_MAX_TOOL_CALLS,
|
||||
@@ -7,6 +8,7 @@ import {
|
||||
import type { ToolCallWindow } from "./types"
|
||||
|
||||
export interface CircuitBreakerSettings {
|
||||
enabled: boolean
|
||||
maxToolCalls: number
|
||||
windowSize: number
|
||||
repetitionThresholdPercent: number
|
||||
@@ -24,6 +26,7 @@ export function resolveCircuitBreakerSettings(
|
||||
config?: BackgroundTaskConfig
|
||||
): CircuitBreakerSettings {
|
||||
return {
|
||||
enabled: config?.circuitBreaker?.enabled ?? DEFAULT_CIRCUIT_BREAKER_ENABLED,
|
||||
maxToolCalls:
|
||||
config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS,
|
||||
windowSize: config?.circuitBreaker?.windowSize ?? DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
|
||||
@@ -36,28 +39,56 @@ export function resolveCircuitBreakerSettings(
|
||||
export function recordToolCall(
|
||||
window: ToolCallWindow | undefined,
|
||||
toolName: string,
|
||||
settings: CircuitBreakerSettings
|
||||
settings: CircuitBreakerSettings,
|
||||
toolInput?: Record<string, unknown> | null
|
||||
): ToolCallWindow {
|
||||
const previous = window?.toolNames ?? []
|
||||
const toolNames = [...previous, toolName].slice(-settings.windowSize)
|
||||
const previous = window?.toolSignatures ?? []
|
||||
const signature = createToolCallSignature(toolName, toolInput)
|
||||
const toolSignatures = [...previous, signature].slice(-settings.windowSize)
|
||||
|
||||
return {
|
||||
toolNames,
|
||||
toolSignatures,
|
||||
windowSize: settings.windowSize,
|
||||
thresholdPercent: settings.repetitionThresholdPercent,
|
||||
}
|
||||
}
|
||||
|
||||
function sortObject(obj: unknown): unknown {
|
||||
if (obj === null || obj === undefined) return obj
|
||||
if (typeof obj !== "object") return obj
|
||||
if (Array.isArray(obj)) return obj.map(sortObject)
|
||||
|
||||
const sorted: Record<string, unknown> = {}
|
||||
const keys = Object.keys(obj as Record<string, unknown>).sort()
|
||||
for (const key of keys) {
|
||||
sorted[key] = sortObject((obj as Record<string, unknown>)[key])
|
||||
}
|
||||
return sorted
|
||||
}
|
||||
|
||||
export function createToolCallSignature(
|
||||
toolName: string,
|
||||
toolInput?: Record<string, unknown> | null
|
||||
): string {
|
||||
if (toolInput === undefined || toolInput === null) {
|
||||
return toolName
|
||||
}
|
||||
if (Object.keys(toolInput).length === 0) {
|
||||
return toolName
|
||||
}
|
||||
return `${toolName}::${JSON.stringify(sortObject(toolInput))}`
|
||||
}
|
||||
|
||||
export function detectRepetitiveToolUse(
|
||||
window: ToolCallWindow | undefined
|
||||
): ToolLoopDetectionResult {
|
||||
if (!window || window.toolNames.length === 0) {
|
||||
if (!window || window.toolSignatures.length === 0) {
|
||||
return { triggered: false }
|
||||
}
|
||||
|
||||
const counts = new Map<string, number>()
|
||||
for (const toolName of window.toolNames) {
|
||||
counts.set(toolName, (counts.get(toolName) ?? 0) + 1)
|
||||
for (const signature of window.toolSignatures) {
|
||||
counts.set(signature, (counts.get(signature) ?? 0) + 1)
|
||||
}
|
||||
|
||||
let repeatedTool: string | undefined
|
||||
@@ -70,7 +101,7 @@ export function detectRepetitiveToolUse(
|
||||
}
|
||||
}
|
||||
|
||||
const sampleSize = window.toolNames.length
|
||||
const sampleSize = window.toolSignatures.length
|
||||
const minimumSampleSize = Math.min(
|
||||
window.windowSize,
|
||||
Math.ceil((window.windowSize * window.thresholdPercent) / 100)
|
||||
@@ -88,7 +119,7 @@ export function detectRepetitiveToolUse(
|
||||
|
||||
return {
|
||||
triggered: true,
|
||||
toolName: repeatedTool,
|
||||
toolName: repeatedTool.split("::")[0],
|
||||
repeatedCount,
|
||||
sampleSize,
|
||||
thresholdPercent: window.thresholdPercent,
|
||||
|
||||
@@ -236,4 +236,181 @@ describe("BackgroundManager circuit breaker", () => {
|
||||
expect(task.progress?.countedToolPartIDs).toEqual(["tool-1"])
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given same tool reading different files", () => {
|
||||
test("#when tool events arrive with state.input #then task keeps running", async () => {
|
||||
const manager = createManager({
|
||||
circuitBreaker: {
|
||||
windowSize: 20,
|
||||
repetitionThresholdPercent: 80,
|
||||
},
|
||||
})
|
||||
const task: BackgroundTask = {
|
||||
id: "task-diff-files-1",
|
||||
sessionID: "session-diff-files-1",
|
||||
parentSessionID: "parent-1",
|
||||
parentMessageID: "msg-1",
|
||||
description: "Reading different files",
|
||||
prompt: "work",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 60_000),
|
||||
progress: {
|
||||
toolCalls: 0,
|
||||
lastUpdate: new Date(Date.now() - 60_000),
|
||||
},
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
for (let i = 0; i < 20; i++) {
|
||||
manager.handleEvent({
|
||||
type: "message.part.updated",
|
||||
properties: {
|
||||
part: {
|
||||
sessionID: task.sessionID,
|
||||
type: "tool",
|
||||
tool: "read",
|
||||
state: { status: "running", input: { filePath: `/src/file-${i}.ts` } },
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
await flushAsyncWork()
|
||||
|
||||
expect(task.status).toBe("running")
|
||||
expect(task.progress?.toolCalls).toBe(20)
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given same tool reading same file repeatedly", () => {
|
||||
test("#when tool events arrive with state.input #then task is cancelled with bare tool name in error", async () => {
|
||||
const manager = createManager({
|
||||
circuitBreaker: {
|
||||
windowSize: 20,
|
||||
repetitionThresholdPercent: 80,
|
||||
},
|
||||
})
|
||||
const task: BackgroundTask = {
|
||||
id: "task-same-file-1",
|
||||
sessionID: "session-same-file-1",
|
||||
parentSessionID: "parent-1",
|
||||
parentMessageID: "msg-1",
|
||||
description: "Reading same file repeatedly",
|
||||
prompt: "work",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 60_000),
|
||||
progress: {
|
||||
toolCalls: 0,
|
||||
lastUpdate: new Date(Date.now() - 60_000),
|
||||
},
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
for (let i = 0; i < 20; i++) {
|
||||
manager.handleEvent({
|
||||
type: "message.part.updated",
|
||||
properties: {
|
||||
part: {
|
||||
sessionID: task.sessionID,
|
||||
type: "tool",
|
||||
tool: "read",
|
||||
state: { status: "running", input: { filePath: "/src/same.ts" } },
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
await flushAsyncWork()
|
||||
|
||||
expect(task.status).toBe("cancelled")
|
||||
expect(task.error).toContain("repeatedly called read")
|
||||
expect(task.error).not.toContain("::")
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given circuit breaker enabled is false", () => {
|
||||
test("#when repetitive tools arrive #then task keeps running", async () => {
|
||||
const manager = createManager({
|
||||
circuitBreaker: {
|
||||
enabled: false,
|
||||
windowSize: 20,
|
||||
repetitionThresholdPercent: 80,
|
||||
},
|
||||
})
|
||||
const task: BackgroundTask = {
|
||||
id: "task-disabled-1",
|
||||
sessionID: "session-disabled-1",
|
||||
parentSessionID: "parent-1",
|
||||
parentMessageID: "msg-1",
|
||||
description: "Disabled circuit breaker task",
|
||||
prompt: "work",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 60_000),
|
||||
progress: {
|
||||
toolCalls: 0,
|
||||
lastUpdate: new Date(Date.now() - 60_000),
|
||||
},
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
for (let i = 0; i < 20; i++) {
|
||||
manager.handleEvent({
|
||||
type: "message.part.updated",
|
||||
properties: {
|
||||
sessionID: task.sessionID,
|
||||
type: "tool",
|
||||
tool: "read",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
await flushAsyncWork()
|
||||
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given circuit breaker enabled is false but absolute cap is low", () => {
|
||||
test("#when max tool calls exceeded #then task is still cancelled by absolute cap", async () => {
|
||||
const manager = createManager({
|
||||
maxToolCalls: 3,
|
||||
circuitBreaker: {
|
||||
enabled: false,
|
||||
windowSize: 10,
|
||||
repetitionThresholdPercent: 95,
|
||||
},
|
||||
})
|
||||
const task: BackgroundTask = {
|
||||
id: "task-cap-disabled-1",
|
||||
sessionID: "session-cap-disabled-1",
|
||||
parentSessionID: "parent-1",
|
||||
parentMessageID: "msg-1",
|
||||
description: "Backstop task with disabled circuit breaker",
|
||||
prompt: "work",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 60_000),
|
||||
progress: {
|
||||
toolCalls: 0,
|
||||
lastUpdate: new Date(Date.now() - 60_000),
|
||||
},
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
for (const toolName of ["read", "grep", "edit"]) {
|
||||
manager.handleEvent({
|
||||
type: "message.part.updated",
|
||||
properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
|
||||
})
|
||||
}
|
||||
|
||||
await flushAsyncWork()
|
||||
|
||||
expect(task.status).toBe("cancelled")
|
||||
expect(task.error).toContain("maximum tool call limit (3)")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -74,7 +74,7 @@ interface MessagePartInfo {
|
||||
sessionID?: string
|
||||
type?: string
|
||||
tool?: string
|
||||
state?: { status?: string }
|
||||
state?: { status?: string; input?: Record<string, unknown> }
|
||||
}
|
||||
|
||||
interface EventProperties {
|
||||
@@ -918,29 +918,32 @@ export class BackgroundManager {
|
||||
task.progress.lastTool = partInfo.tool
|
||||
const circuitBreaker = resolveCircuitBreakerSettings(this.config)
|
||||
if (partInfo.tool) {
|
||||
task.progress.toolCallWindow = recordToolCall(
|
||||
task.progress.toolCallWindow,
|
||||
partInfo.tool,
|
||||
circuitBreaker
|
||||
)
|
||||
task.progress.toolCallWindow = recordToolCall(
|
||||
task.progress.toolCallWindow,
|
||||
partInfo.tool,
|
||||
circuitBreaker,
|
||||
partInfo.state?.input
|
||||
)
|
||||
|
||||
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
|
||||
if (loopDetection.triggered) {
|
||||
log("[background-agent] Circuit breaker: repetitive tool usage detected", {
|
||||
taskId: task.id,
|
||||
agent: task.agent,
|
||||
sessionID,
|
||||
toolName: loopDetection.toolName,
|
||||
repeatedCount: loopDetection.repeatedCount,
|
||||
sampleSize: loopDetection.sampleSize,
|
||||
thresholdPercent: loopDetection.thresholdPercent,
|
||||
})
|
||||
void this.cancelTask(task.id, {
|
||||
source: "circuit-breaker",
|
||||
reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
|
||||
})
|
||||
return
|
||||
}
|
||||
if (circuitBreaker.enabled) {
|
||||
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
|
||||
if (loopDetection.triggered) {
|
||||
log("[background-agent] Circuit breaker: repetitive tool usage detected", {
|
||||
taskId: task.id,
|
||||
agent: task.agent,
|
||||
sessionID,
|
||||
toolName: loopDetection.toolName,
|
||||
repeatedCount: loopDetection.repeatedCount,
|
||||
sampleSize: loopDetection.sampleSize,
|
||||
thresholdPercent: loopDetection.thresholdPercent,
|
||||
})
|
||||
void this.cancelTask(task.id, {
|
||||
source: "circuit-breaker",
|
||||
reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const maxToolCalls = circuitBreaker.maxToolCalls
|
||||
|
||||
@@ -10,7 +10,7 @@ export type BackgroundTaskStatus =
|
||||
| "interrupt"
|
||||
|
||||
export interface ToolCallWindow {
|
||||
toolNames: string[]
|
||||
toolSignatures: string[]
|
||||
windowSize: number
|
||||
thresholdPercent: number
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user