diff --git a/assets/oh-my-opencode.schema.json b/assets/oh-my-opencode.schema.json index 46babc30b..dfd7558bf 100644 --- a/assets/oh-my-opencode.schema.json +++ b/assets/oh-my-opencode.schema.json @@ -3699,6 +3699,32 @@ "syncPollTimeoutMs": { "type": "number", "minimum": 60000 + }, + "maxToolCalls": { + "type": "integer", + "minimum": 10, + "maximum": 9007199254740991 + }, + "circuitBreaker": { + "type": "object", + "properties": { + "maxToolCalls": { + "type": "integer", + "minimum": 10, + "maximum": 9007199254740991 + }, + "windowSize": { + "type": "integer", + "minimum": 5, + "maximum": 9007199254740991 + }, + "repetitionThresholdPercent": { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 100 + } + }, + "additionalProperties": false } }, "additionalProperties": false diff --git a/src/config/schema/background-task-circuit-breaker.test.ts b/src/config/schema/background-task-circuit-breaker.test.ts new file mode 100644 index 000000000..e236d4012 --- /dev/null +++ b/src/config/schema/background-task-circuit-breaker.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, test } from "bun:test" +import { ZodError } from "zod/v4" +import { BackgroundTaskConfigSchema } from "./background-task" + +describe("BackgroundTaskConfigSchema.circuitBreaker", () => { + describe("#given valid circuit breaker settings", () => { + test("#when parsed #then returns nested config", () => { + const result = BackgroundTaskConfigSchema.parse({ + circuitBreaker: { + maxToolCalls: 150, + windowSize: 10, + repetitionThresholdPercent: 70, + }, + }) + + expect(result.circuitBreaker).toEqual({ + maxToolCalls: 150, + windowSize: 10, + repetitionThresholdPercent: 70, + }) + }) + }) + + describe("#given windowSize below minimum", () => { + test("#when parsed #then throws ZodError", () => { + let thrownError: unknown + + try { + BackgroundTaskConfigSchema.parse({ + circuitBreaker: { + windowSize: 4, + }, + }) + } catch (error) { + thrownError = error + } + + expect(thrownError).toBeInstanceOf(ZodError) + }) + }) + + describe("#given repetitionThresholdPercent is zero", () => { + test("#when parsed #then throws ZodError", () => { + let thrownError: unknown + + try { + BackgroundTaskConfigSchema.parse({ + circuitBreaker: { + repetitionThresholdPercent: 0, + }, + }) + } catch (error) { + thrownError = error + } + + expect(thrownError).toBeInstanceOf(ZodError) + }) + }) +}) diff --git a/src/config/schema/background-task.ts b/src/config/schema/background-task.ts index f98040e2d..5bec5065e 100644 --- a/src/config/schema/background-task.ts +++ b/src/config/schema/background-task.ts @@ -1,5 +1,11 @@ import { z } from "zod" +const CircuitBreakerConfigSchema = z.object({ + maxToolCalls: z.number().int().min(10).optional(), + windowSize: z.number().int().min(5).optional(), + repetitionThresholdPercent: z.number().gt(0).max(100).optional(), +}) + export const BackgroundTaskConfigSchema = z.object({ defaultConcurrency: z.number().min(1).optional(), providerConcurrency: z.record(z.string(), z.number().min(0)).optional(), @@ -13,6 +19,7 @@ export const BackgroundTaskConfigSchema = z.object({ syncPollTimeoutMs: z.number().min(60000).optional(), /** Maximum tool calls per subagent task before circuit breaker triggers (default: 200, minimum: 10). Prevents runaway loops from burning unlimited tokens. */ maxToolCalls: z.number().int().min(10).optional(), + circuitBreaker: CircuitBreakerConfigSchema.optional(), }) export type BackgroundTaskConfig = z.infer diff --git a/src/features/background-agent/constants.ts b/src/features/background-agent/constants.ts index 0fbde2964..6d5b97498 100644 --- a/src/features/background-agent/constants.ts +++ b/src/features/background-agent/constants.ts @@ -6,6 +6,9 @@ export const TERMINAL_TASK_TTL_MS = 30 * 60 * 1000 export const MIN_STABILITY_TIME_MS = 10 * 1000 export const DEFAULT_STALE_TIMEOUT_MS = 180_000 export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000 +export const DEFAULT_MAX_TOOL_CALLS = 200 +export const DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE = 20 +export const DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT = 80 export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000 export const MIN_IDLE_TIME_MS = 5000 export const POLLING_INTERVAL_MS = 3000 diff --git a/src/features/background-agent/loop-detector.test.ts b/src/features/background-agent/loop-detector.test.ts new file mode 100644 index 000000000..b3d5de806 --- /dev/null +++ b/src/features/background-agent/loop-detector.test.ts @@ -0,0 +1,117 @@ +import { describe, expect, test } from "bun:test" +import { + detectRepetitiveToolUse, + recordToolCall, + resolveCircuitBreakerSettings, +} from "./loop-detector" + +function buildWindow( + toolNames: string[], + override?: Parameters[0] +) { + const settings = resolveCircuitBreakerSettings(override) + + return toolNames.reduce( + (window, toolName) => recordToolCall(window, toolName, settings), + undefined as ReturnType | undefined + ) +} + +describe("loop-detector", () => { + describe("resolveCircuitBreakerSettings", () => { + describe("#given nested circuit breaker config", () => { + test("#when resolved #then nested values override defaults", () => { + const result = resolveCircuitBreakerSettings({ + maxToolCalls: 200, + circuitBreaker: { + maxToolCalls: 120, + windowSize: 10, + repetitionThresholdPercent: 70, + }, + }) + + expect(result).toEqual({ + maxToolCalls: 120, + windowSize: 10, + repetitionThresholdPercent: 70, + }) + }) + }) + }) + + describe("detectRepetitiveToolUse", () => { + describe("#given recent tools are diverse", () => { + test("#when evaluated #then it does not trigger", () => { + const window = buildWindow([ + "read", + "grep", + "edit", + "bash", + "read", + "glob", + "lsp_diagnostics", + "read", + "grep", + "edit", + ]) + + const result = detectRepetitiveToolUse(window) + + expect(result.triggered).toBe(false) + }) + }) + + describe("#given the same tool dominates the recent window", () => { + test("#when evaluated #then it triggers", () => { + const window = buildWindow([ + "read", + "read", + "read", + "edit", + "read", + "read", + "read", + "read", + "grep", + "read", + ], { + circuitBreaker: { + windowSize: 10, + repetitionThresholdPercent: 80, + }, + }) + + const result = detectRepetitiveToolUse(window) + + expect(result).toEqual({ + triggered: true, + toolName: "read", + repeatedCount: 8, + sampleSize: 10, + thresholdPercent: 80, + }) + }) + }) + + describe("#given the window is not full yet", () => { + test("#when the current sample crosses the threshold #then it still triggers", () => { + const window = buildWindow(["read", "read", "edit", "read", "read", "read", "read", "read"], { + circuitBreaker: { + windowSize: 10, + repetitionThresholdPercent: 80, + }, + }) + + const result = detectRepetitiveToolUse(window) + + expect(result).toEqual({ + triggered: true, + toolName: "read", + repeatedCount: 7, + sampleSize: 8, + thresholdPercent: 80, + }) + }) + }) + }) +}) diff --git a/src/features/background-agent/loop-detector.ts b/src/features/background-agent/loop-detector.ts new file mode 100644 index 000000000..610ddf147 --- /dev/null +++ b/src/features/background-agent/loop-detector.ts @@ -0,0 +1,96 @@ +import type { BackgroundTaskConfig } from "../../config/schema" +import { + DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT, + DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE, + DEFAULT_MAX_TOOL_CALLS, +} from "./constants" +import type { ToolCallWindow } from "./types" + +export interface CircuitBreakerSettings { + maxToolCalls: number + windowSize: number + repetitionThresholdPercent: number +} + +export interface ToolLoopDetectionResult { + triggered: boolean + toolName?: string + repeatedCount?: number + sampleSize?: number + thresholdPercent?: number +} + +export function resolveCircuitBreakerSettings( + config?: BackgroundTaskConfig +): CircuitBreakerSettings { + return { + maxToolCalls: + config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS, + windowSize: config?.circuitBreaker?.windowSize ?? DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE, + repetitionThresholdPercent: + config?.circuitBreaker?.repetitionThresholdPercent ?? + DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT, + } +} + +export function recordToolCall( + window: ToolCallWindow | undefined, + toolName: string, + settings: CircuitBreakerSettings +): ToolCallWindow { + const previous = window?.toolNames ?? [] + const toolNames = [...previous, toolName].slice(-settings.windowSize) + + return { + toolNames, + windowSize: settings.windowSize, + thresholdPercent: settings.repetitionThresholdPercent, + } +} + +export function detectRepetitiveToolUse( + window: ToolCallWindow | undefined +): ToolLoopDetectionResult { + if (!window || window.toolNames.length === 0) { + return { triggered: false } + } + + const counts = new Map() + for (const toolName of window.toolNames) { + counts.set(toolName, (counts.get(toolName) ?? 0) + 1) + } + + let repeatedTool: string | undefined + let repeatedCount = 0 + + for (const [toolName, count] of counts.entries()) { + if (count > repeatedCount) { + repeatedTool = toolName + repeatedCount = count + } + } + + const sampleSize = window.toolNames.length + const minimumSampleSize = Math.min( + window.windowSize, + Math.ceil((window.windowSize * window.thresholdPercent) / 100) + ) + + if (sampleSize < minimumSampleSize) { + return { triggered: false } + } + + const thresholdCount = Math.ceil((sampleSize * window.thresholdPercent) / 100) + + if (!repeatedTool || repeatedCount < thresholdCount) { + return { triggered: false } + } + + return { + triggered: true, + toolName: repeatedTool, + repeatedCount, + sampleSize, + thresholdPercent: window.thresholdPercent, + } +} diff --git a/src/features/background-agent/manager-circuit-breaker.test.ts b/src/features/background-agent/manager-circuit-breaker.test.ts new file mode 100644 index 000000000..619b49327 --- /dev/null +++ b/src/features/background-agent/manager-circuit-breaker.test.ts @@ -0,0 +1,239 @@ +import { describe, expect, test } from "bun:test" +import type { PluginInput } from "@opencode-ai/plugin" +import { tmpdir } from "node:os" +import type { BackgroundTaskConfig } from "../../config/schema" +import { BackgroundManager } from "./manager" +import type { BackgroundTask } from "./types" + +function createManager(config?: BackgroundTaskConfig): BackgroundManager { + const client = { + session: { + prompt: async () => ({}), + promptAsync: async () => ({}), + abort: async () => ({}), + }, + } + + const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, config) + const testManager = manager as unknown as { + enqueueNotificationForParent: (sessionID: string, fn: () => Promise) => Promise + notifyParentSession: (task: BackgroundTask) => Promise + tasks: Map + } + + testManager.enqueueNotificationForParent = async (_sessionID, fn) => { + await fn() + } + testManager.notifyParentSession = async () => {} + + return manager +} + +function getTaskMap(manager: BackgroundManager): Map { + return (manager as unknown as { tasks: Map }).tasks +} + +async function flushAsyncWork() { + await new Promise(resolve => setTimeout(resolve, 0)) +} + +describe("BackgroundManager circuit breaker", () => { + describe("#given the same tool dominates the recent window", () => { + test("#when tool events arrive #then the task is cancelled early", async () => { + const manager = createManager({ + circuitBreaker: { + windowSize: 20, + repetitionThresholdPercent: 80, + }, + }) + const task: BackgroundTask = { + id: "task-loop-1", + sessionID: "session-loop-1", + parentSessionID: "parent-1", + parentMessageID: "msg-1", + description: "Looping task", + prompt: "loop", + agent: "explore", + status: "running", + startedAt: new Date(Date.now() - 60_000), + progress: { + toolCalls: 0, + lastUpdate: new Date(Date.now() - 60_000), + }, + } + getTaskMap(manager).set(task.id, task) + + for (const toolName of [ + "read", + "read", + "grep", + "read", + "edit", + "read", + "read", + "bash", + "read", + "read", + "read", + "glob", + "read", + "read", + "read", + "read", + "read", + "read", + "read", + "read", + ]) { + manager.handleEvent({ + type: "message.part.updated", + properties: { sessionID: task.sessionID, type: "tool", tool: toolName }, + }) + } + + await flushAsyncWork() + + expect(task.status).toBe("cancelled") + expect(task.error).toContain("repeatedly called read 16/20 times") + }) + }) + + describe("#given recent tool calls are diverse", () => { + test("#when the window fills #then the task keeps running", async () => { + const manager = createManager({ + circuitBreaker: { + windowSize: 10, + repetitionThresholdPercent: 80, + }, + }) + const task: BackgroundTask = { + id: "task-diverse-1", + sessionID: "session-diverse-1", + parentSessionID: "parent-1", + parentMessageID: "msg-1", + description: "Healthy task", + prompt: "work", + agent: "explore", + status: "running", + startedAt: new Date(Date.now() - 60_000), + progress: { + toolCalls: 0, + lastUpdate: new Date(Date.now() - 60_000), + }, + } + getTaskMap(manager).set(task.id, task) + + for (const toolName of [ + "read", + "grep", + "edit", + "bash", + "glob", + "read", + "lsp_diagnostics", + "grep", + "edit", + "read", + ]) { + manager.handleEvent({ + type: "message.part.updated", + properties: { sessionID: task.sessionID, type: "tool", tool: toolName }, + }) + } + + await flushAsyncWork() + + expect(task.status).toBe("running") + expect(task.progress?.toolCalls).toBe(10) + }) + }) + + describe("#given the absolute cap is configured lower than the repetition detector needs", () => { + test("#when the raw tool-call cap is reached #then the backstop still cancels the task", async () => { + const manager = createManager({ + maxToolCalls: 3, + circuitBreaker: { + windowSize: 10, + repetitionThresholdPercent: 95, + }, + }) + const task: BackgroundTask = { + id: "task-cap-1", + sessionID: "session-cap-1", + parentSessionID: "parent-1", + parentMessageID: "msg-1", + description: "Backstop task", + prompt: "work", + agent: "explore", + status: "running", + startedAt: new Date(Date.now() - 60_000), + progress: { + toolCalls: 0, + lastUpdate: new Date(Date.now() - 60_000), + }, + } + getTaskMap(manager).set(task.id, task) + + for (const toolName of ["read", "grep", "edit"]) { + manager.handleEvent({ + type: "message.part.updated", + properties: { sessionID: task.sessionID, type: "tool", tool: toolName }, + }) + } + + await flushAsyncWork() + + expect(task.status).toBe("cancelled") + expect(task.error).toContain("maximum tool call limit (3)") + }) + }) + + describe("#given the same running tool part emits multiple updates", () => { + test("#when duplicate running updates arrive #then it only counts the tool once", async () => { + const manager = createManager({ + maxToolCalls: 2, + circuitBreaker: { + windowSize: 5, + repetitionThresholdPercent: 80, + }, + }) + const task: BackgroundTask = { + id: "task-dedupe-1", + sessionID: "session-dedupe-1", + parentSessionID: "parent-1", + parentMessageID: "msg-1", + description: "Dedupe task", + prompt: "work", + agent: "explore", + status: "running", + startedAt: new Date(Date.now() - 60_000), + progress: { + toolCalls: 0, + lastUpdate: new Date(Date.now() - 60_000), + }, + } + getTaskMap(manager).set(task.id, task) + + for (let index = 0; index < 3; index += 1) { + manager.handleEvent({ + type: "message.part.updated", + properties: { + part: { + id: "tool-1", + sessionID: task.sessionID, + type: "tool", + tool: "bash", + state: { status: "running" }, + }, + }, + }) + } + + await flushAsyncWork() + + expect(task.status).toBe("running") + expect(task.progress?.toolCalls).toBe(1) + expect(task.progress?.countedToolPartIDs).toEqual(["tool-1"]) + }) + }) +}) diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index 66a0966a7..95a953a03 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -52,6 +52,11 @@ import { join } from "node:path" import { pruneStaleTasksAndNotifications } from "./task-poller" import { checkAndInterruptStaleTasks } from "./task-poller" import { removeTaskToastTracking } from "./remove-task-toast-tracking" +import { + detectRepetitiveToolUse, + recordToolCall, + resolveCircuitBreakerSettings, +} from "./loop-detector" import { createSubagentDepthLimitError, createSubagentDescendantLimitError, @@ -65,9 +70,11 @@ type OpencodeClient = PluginInput["client"] interface MessagePartInfo { + id?: string sessionID?: string type?: string tool?: string + state?: { status?: string } } interface EventProperties { @@ -81,6 +88,19 @@ interface Event { properties?: EventProperties } +function resolveMessagePartInfo(properties: EventProperties | undefined): MessagePartInfo | undefined { + if (!properties || typeof properties !== "object") { + return undefined + } + + const nestedPart = properties.part + if (nestedPart && typeof nestedPart === "object") { + return nestedPart as MessagePartInfo + } + + return properties as MessagePartInfo +} + interface Todo { content: string status: string @@ -723,6 +743,8 @@ export class BackgroundManager { existingTask.progress = { toolCalls: existingTask.progress?.toolCalls ?? 0, + toolCallWindow: existingTask.progress?.toolCallWindow, + countedToolPartIDs: existingTask.progress?.countedToolPartIDs, lastUpdate: new Date(), } @@ -855,8 +877,7 @@ export class BackgroundManager { } if (event.type === "message.part.updated" || event.type === "message.part.delta") { - if (!props || typeof props !== "object" || !("sessionID" in props)) return - const partInfo = props as unknown as MessagePartInfo + const partInfo = resolveMessagePartInfo(props) const sessionID = partInfo?.sessionID if (!sessionID) return @@ -879,10 +900,50 @@ export class BackgroundManager { task.progress.lastUpdate = new Date() if (partInfo?.type === "tool" || partInfo?.tool) { + const countedToolPartIDs = task.progress.countedToolPartIDs ?? [] + const shouldCountToolCall = + !partInfo.id || + partInfo.state?.status !== "running" || + !countedToolPartIDs.includes(partInfo.id) + + if (!shouldCountToolCall) { + return + } + + if (partInfo.id && partInfo.state?.status === "running") { + task.progress.countedToolPartIDs = [...countedToolPartIDs, partInfo.id] + } + task.progress.toolCalls += 1 task.progress.lastTool = partInfo.tool + const circuitBreaker = resolveCircuitBreakerSettings(this.config) + if (partInfo.tool) { + task.progress.toolCallWindow = recordToolCall( + task.progress.toolCallWindow, + partInfo.tool, + circuitBreaker + ) - const maxToolCalls = this.config?.maxToolCalls ?? 200 + const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow) + if (loopDetection.triggered) { + log("[background-agent] Circuit breaker: repetitive tool usage detected", { + taskId: task.id, + agent: task.agent, + sessionID, + toolName: loopDetection.toolName, + repeatedCount: loopDetection.repeatedCount, + sampleSize: loopDetection.sampleSize, + thresholdPercent: loopDetection.thresholdPercent, + }) + void this.cancelTask(task.id, { + source: "circuit-breaker", + reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`, + }) + return + } + } + + const maxToolCalls = circuitBreaker.maxToolCalls if (task.progress.toolCalls >= maxToolCalls) { log("[background-agent] Circuit breaker: tool call limit reached", { taskId: task.id, diff --git a/src/features/background-agent/types.ts b/src/features/background-agent/types.ts index 73ae8a000..7129aa2fd 100644 --- a/src/features/background-agent/types.ts +++ b/src/features/background-agent/types.ts @@ -9,9 +9,17 @@ export type BackgroundTaskStatus = | "cancelled" | "interrupt" +export interface ToolCallWindow { + toolNames: string[] + windowSize: number + thresholdPercent: number +} + export interface TaskProgress { toolCalls: number lastTool?: string + toolCallWindow?: ToolCallWindow + countedToolPartIDs?: string[] lastUpdate: Date lastMessage?: string lastMessageAt?: Date