Compare commits

..

14 Commits

Author SHA1 Message Date
YeonGyu-Kim
9ca259dcdc fix(runtime-fallback): preserve agent variant and reasoningEffort on model fallback (fixes #2621)
When runtime fallback switches to a different model, the agent's
configured variant and reasoningEffort were lost because
buildRetryModelPayload only extracted variant from the fallback
model string itself.

Now buildRetryModelPayload accepts optional agentSettings and uses
the agent's variant as fallback when the model string doesn't
include one. reasoningEffort is also passed through.

🤖 Generated with assistance of [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 12:11:01 +09:00
YeonGyu-Kim
55ac653eaa feat(hooks): add todo-description-override hook to enforce atomic todo format
Override TodoWrite description via tool.definition hook to require
WHERE/WHY/HOW/RESULT in each todo title and enforce 1-3 tool call
granularity.
2026-03-18 11:49:13 +09:00
YeonGyu-Kim
1d5652dfa9 Merge pull request #2655 from tad-hq/infinite-circuit-target-fix
fix(circuit-breaker): make repetitive detection target-aware and add enabled escape hatch
2026-03-18 11:46:06 +09:00
YeonGyu-Kim
76c460536d docs(start-work): update worktree and task breakdown guidance
- Change worktree behavior: default to current directory, worktree only with --worktree flag
- Add mandatory TASK BREAKDOWN section with granular sub-task requirements
- Add WORKTREE COMPLETION section for merging worktree branches back

🤖 Generated with assistance of OhMyOpenCode
2026-03-18 11:16:43 +09:00
github-actions[bot]
b067d4a284 @ogormans-deptstack has signed the CLA in code-yeongyu/oh-my-openagent#2656 2026-03-17 20:42:53 +00:00
github-actions[bot]
94838ec039 @tad-hq has signed the CLA in code-yeongyu/oh-my-openagent#2655 2026-03-17 20:07:20 +00:00
tad-hq
224ecea8c7 chore: regenerate JSON schema with circuitBreaker.enabled field 2026-03-17 13:43:56 -06:00
tad-hq
5d5755f29d fix(circuit-breaker): wire target-aware detection into background manager 2026-03-17 13:40:46 -06:00
tad-hq
1fdce01fd2 fix(circuit-breaker): target-aware loop detection via tool signatures 2026-03-17 13:36:09 -06:00
tad-hq
c8213c970e fix(circuit-breaker): add enabled config flag as escape hatch 2026-03-17 13:29:06 -06:00
YeonGyu-Kim
576ff453e5 Merge pull request #2651 from code-yeongyu/fix/openagent-version-in-publish
fix(release): set version when publishing oh-my-openagent
2026-03-18 02:15:36 +09:00
YeonGyu-Kim
9b8aca45f9 fix(release): set version when publishing oh-my-openagent
The publish step was updating name and optionalDependencies but not
version, causing npm to try publishing the base package.json version
(3.11.0) instead of the release version (3.12.0).

Error was: 'You cannot publish over the previously published versions: 3.11.0'
2026-03-18 02:15:15 +09:00
YeonGyu-Kim
f1f20f5a79 Merge pull request #2650 from code-yeongyu/fix/openagent-platform-publish
fix(release): add oh-my-openagent dual-publish to platform and main workflows
2026-03-18 01:55:31 +09:00
YeonGyu-Kim
de40caf76d fix(release): add oh-my-openagent dual-publish to platform and main workflows
- publish-platform.yml: Build job now checks BOTH oh-my-opencode and
  oh-my-openagent before skipping. Build only skips when both are published.
  Added 'Publish oh-my-openagent-{platform}' step that renames package.json
  and publishes under the openagent name.

- publish.yml: Added 'Publish oh-my-openagent' step after opencode publish.
  Rewrites package name and optionalDependencies to oh-my-openagent variants,
  then publishes. Restores package.json after.

Previously, oh-my-openagent platform packages were never published because
the build skip check only looked at oh-my-opencode (which was already published),
causing the entire build to be skipped.
2026-03-18 01:45:02 +09:00
23 changed files with 798 additions and 74 deletions

View File

@@ -59,20 +59,39 @@ jobs:
- name: Check if already published
id: check
run: |
PKG_NAME="oh-my-opencode-${{ matrix.platform }}"
VERSION="${{ inputs.version }}"
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/${PKG_NAME}/${VERSION}")
# Convert platform name for output (replace - with _)
PLATFORM_KEY="${{ matrix.platform }}"
PLATFORM_KEY="${PLATFORM_KEY//-/_}"
if [ "$STATUS" = "200" ]; then
# Check oh-my-opencode
OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}")
# Check oh-my-openagent
OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}")
echo "oh-my-opencode-${{ matrix.platform }}@${VERSION}: ${OC_STATUS}"
echo "oh-my-openagent-${{ matrix.platform }}@${VERSION}: ${OA_STATUS}"
if [ "$OC_STATUS" = "200" ]; then
echo "skip_opencode=true" >> $GITHUB_OUTPUT
echo "✓ oh-my-opencode-${{ matrix.platform }}@${VERSION} already published"
else
echo "skip_opencode=false" >> $GITHUB_OUTPUT
echo "→ oh-my-opencode-${{ matrix.platform }}@${VERSION} needs publishing"
fi
if [ "$OA_STATUS" = "200" ]; then
echo "skip_openagent=true" >> $GITHUB_OUTPUT
echo "✓ oh-my-openagent-${{ matrix.platform }}@${VERSION} already published"
else
echo "skip_openagent=false" >> $GITHUB_OUTPUT
echo "→ oh-my-openagent-${{ matrix.platform }}@${VERSION} needs publishing"
fi
# Skip build only if BOTH are already published
if [ "$OC_STATUS" = "200" ] && [ "$OA_STATUS" = "200" ]; then
echo "skip=true" >> $GITHUB_OUTPUT
echo "skip_${PLATFORM_KEY}=true" >> $GITHUB_OUTPUT
echo "✓ ${PKG_NAME}@${VERSION} already published"
else
echo "skip=false" >> $GITHUB_OUTPUT
echo "skip_${PLATFORM_KEY}=false" >> $GITHUB_OUTPUT
echo "→ ${PKG_NAME}@${VERSION} needs publishing"
fi
- name: Update version in package.json
@@ -207,23 +226,38 @@ jobs:
matrix:
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
steps:
- name: Check if oh-my-opencode already published
- name: Check if already published
id: check
run: |
PKG_NAME="oh-my-opencode-${{ matrix.platform }}"
VERSION="${{ inputs.version }}"
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/${PKG_NAME}/${VERSION}")
if [ "$STATUS" = "200" ]; then
echo "skip=true" >> $GITHUB_OUTPUT
echo "✓ ${PKG_NAME}@${VERSION} already published, skipping"
OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}")
OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}")
if [ "$OC_STATUS" = "200" ]; then
echo "skip_opencode=true" >> $GITHUB_OUTPUT
echo "✓ oh-my-opencode-${{ matrix.platform }}@${VERSION} already published"
else
echo "skip=false" >> $GITHUB_OUTPUT
echo "→ ${PKG_NAME}@${VERSION} will be published"
echo "skip_opencode=false" >> $GITHUB_OUTPUT
fi
if [ "$OA_STATUS" = "200" ]; then
echo "skip_openagent=true" >> $GITHUB_OUTPUT
echo "✓ oh-my-openagent-${{ matrix.platform }}@${VERSION} already published"
else
echo "skip_openagent=false" >> $GITHUB_OUTPUT
fi
# Need artifact if either package needs publishing
if [ "$OC_STATUS" = "200" ] && [ "$OA_STATUS" = "200" ]; then
echo "skip_all=true" >> $GITHUB_OUTPUT
else
echo "skip_all=false" >> $GITHUB_OUTPUT
fi
- name: Download artifact
id: download
if: steps.check.outputs.skip != 'true'
if: steps.check.outputs.skip_all != 'true'
continue-on-error: true
uses: actions/download-artifact@v4
with:
@@ -231,7 +265,7 @@ jobs:
path: .
- name: Extract artifact
if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
if: steps.check.outputs.skip_all != 'true' && steps.download.outcome == 'success'
run: |
PLATFORM="${{ matrix.platform }}"
mkdir -p packages/${PLATFORM}
@@ -247,13 +281,13 @@ jobs:
ls -la packages/${PLATFORM}/bin/
- uses: actions/setup-node@v4
if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
if: steps.check.outputs.skip_all != 'true' && steps.download.outcome == 'success'
with:
node-version: "24"
registry-url: "https://registry.npmjs.org"
- name: Publish ${{ matrix.platform }}
if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
- name: Publish oh-my-opencode-${{ matrix.platform }}
if: steps.check.outputs.skip_opencode != 'true' && steps.download.outcome == 'success'
run: |
cd packages/${{ matrix.platform }}
@@ -267,3 +301,25 @@ jobs:
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
NPM_CONFIG_PROVENANCE: true
timeout-minutes: 15
- name: Publish oh-my-openagent-${{ matrix.platform }}
if: steps.check.outputs.skip_openagent != 'true' && steps.download.outcome == 'success'
run: |
cd packages/${{ matrix.platform }}
# Rename package for oh-my-openagent
jq --arg name "oh-my-openagent-${{ matrix.platform }}" \
--arg desc "Platform-specific binary for oh-my-openagent (${{ matrix.platform }})" \
'.name = $name | .description = $desc | .bin = {"oh-my-openagent": (.bin | to_entries | .[0].value)}' \
package.json > tmp.json && mv tmp.json package.json
TAG_ARG=""
if [ -n "${{ inputs.dist_tag }}" ]; then
TAG_ARG="--tag ${{ inputs.dist_tag }}"
fi
npm publish --access public --provenance $TAG_ARG
env:
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
NPM_CONFIG_PROVENANCE: true
timeout-minutes: 15

View File

@@ -216,6 +216,48 @@ jobs:
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
NPM_CONFIG_PROVENANCE: true
- name: Check if oh-my-openagent already published
id: check-openagent
run: |
VERSION="${{ steps.version.outputs.version }}"
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent/${VERSION}")
if [ "$STATUS" = "200" ]; then
echo "skip=true" >> $GITHUB_OUTPUT
echo "✓ oh-my-openagent@${VERSION} already published"
else
echo "skip=false" >> $GITHUB_OUTPUT
fi
- name: Publish oh-my-openagent
if: steps.check-openagent.outputs.skip != 'true'
run: |
VERSION="${{ steps.version.outputs.version }}"
# Update package name, version, and optionalDependencies for oh-my-openagent
jq --arg v "$VERSION" '
.name = "oh-my-openagent" |
.version = $v |
.optionalDependencies = (
.optionalDependencies | to_entries |
map(.key = (.key | sub("^oh-my-opencode-"; "oh-my-openagent-")) | .value = $v) |
from_entries
)
' package.json > tmp.json && mv tmp.json package.json
TAG_ARG=""
if [ -n "${{ steps.version.outputs.dist_tag }}" ]; then
TAG_ARG="--tag ${{ steps.version.outputs.dist_tag }}"
fi
npm publish --access public --provenance $TAG_ARG || echo "::warning::oh-my-openagent publish failed"
env:
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
NPM_CONFIG_PROVENANCE: true
- name: Restore package.json
if: steps.check-openagent.outputs.skip != 'true'
run: |
git checkout -- package.json
trigger-platform:
runs-on: ubuntu-latest
needs: publish-main

View File

@@ -3708,6 +3708,9 @@
"circuitBreaker": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"maxToolCalls": {
"type": "integer",
"minimum": 10,

View File

@@ -2223,6 +2223,22 @@
"created_at": "2026-03-17T08:27:45Z",
"repoId": 1108837393,
"pullRequestNo": 2640
},
{
"name": "tad-hq",
"id": 213478119,
"comment_id": 4077697128,
"created_at": "2026-03-17T20:07:09Z",
"repoId": 1108837393,
"pullRequestNo": 2655
},
{
"name": "ogormans-deptstack",
"id": 208788555,
"comment_id": 4077893096,
"created_at": "2026-03-17T20:42:42Z",
"repoId": 1108837393,
"pullRequestNo": 2656
}
]
}

View File

@@ -1,6 +1,7 @@
import { z } from "zod"
const CircuitBreakerConfigSchema = z.object({
enabled: z.boolean().optional(),
maxToolCalls: z.number().int().min(10).optional(),
windowSize: z.number().int().min(5).optional(),
repetitionThresholdPercent: z.number().gt(0).max(100).optional(),

View File

@@ -51,6 +51,7 @@ export const HookNameSchema = z.enum([
"anthropic-effort",
"hashline-read-enhancer",
"read-image-resizer",
"todo-description-override",
])
export type HookName = z.infer<typeof HookNameSchema>

View File

@@ -9,6 +9,7 @@ export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
export const DEFAULT_MAX_TOOL_CALLS = 200
export const DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE = 20
export const DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT = 80
export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
export const MIN_IDLE_TIME_MS = 5000
export const POLLING_INTERVAL_MS = 3000

View File

@@ -1,5 +1,6 @@
import { describe, expect, test } from "bun:test"
import {
createToolCallSignature,
detectRepetitiveToolUse,
recordToolCall,
resolveCircuitBreakerSettings,
@@ -17,6 +18,17 @@ function buildWindow(
)
}
function buildWindowWithInputs(
calls: Array<{ tool: string; input?: Record<string, unknown> }>,
override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
) {
const settings = resolveCircuitBreakerSettings(override)
return calls.reduce(
(window, { tool, input }) => recordToolCall(window, tool, settings, input),
undefined as ReturnType<typeof recordToolCall> | undefined
)
}
describe("loop-detector", () => {
describe("resolveCircuitBreakerSettings", () => {
describe("#given nested circuit breaker config", () => {
@@ -31,12 +43,90 @@ describe("loop-detector", () => {
})
expect(result).toEqual({
enabled: true,
maxToolCalls: 120,
windowSize: 10,
repetitionThresholdPercent: 70,
})
})
})
describe("#given no enabled config", () => {
test("#when resolved #then enabled defaults to true", () => {
const result = resolveCircuitBreakerSettings({
circuitBreaker: {
maxToolCalls: 100,
windowSize: 5,
repetitionThresholdPercent: 60,
},
})
expect(result.enabled).toBe(true)
})
})
describe("#given enabled is false in config", () => {
test("#when resolved #then enabled is false", () => {
const result = resolveCircuitBreakerSettings({
circuitBreaker: {
enabled: false,
maxToolCalls: 100,
windowSize: 5,
repetitionThresholdPercent: 60,
},
})
expect(result.enabled).toBe(false)
})
})
describe("#given enabled is true in config", () => {
test("#when resolved #then enabled is true", () => {
const result = resolveCircuitBreakerSettings({
circuitBreaker: {
enabled: true,
maxToolCalls: 100,
windowSize: 5,
repetitionThresholdPercent: 60,
},
})
expect(result.enabled).toBe(true)
})
})
})
describe("createToolCallSignature", () => {
test("#given tool with input #when signature created #then includes tool and sorted input", () => {
const result = createToolCallSignature("read", { filePath: "/a.ts" })
expect(result).toBe('read::{"filePath":"/a.ts"}')
})
test("#given tool with undefined input #when signature created #then returns bare tool name", () => {
const result = createToolCallSignature("read", undefined)
expect(result).toBe("read")
})
test("#given tool with null input #when signature created #then returns bare tool name", () => {
const result = createToolCallSignature("read", null)
expect(result).toBe("read")
})
test("#given tool with empty object input #when signature created #then returns bare tool name", () => {
const result = createToolCallSignature("read", {})
expect(result).toBe("read")
})
test("#given same input different key order #when signatures compared #then they are equal", () => {
const first = createToolCallSignature("read", { filePath: "/a.ts", offset: 0 })
const second = createToolCallSignature("read", { offset: 0, filePath: "/a.ts" })
expect(first).toBe(second)
})
})
describe("detectRepetitiveToolUse", () => {
@@ -113,5 +203,56 @@ describe("loop-detector", () => {
})
})
})
describe("#given same tool with different file inputs", () => {
test("#when evaluated #then it does not trigger", () => {
const calls = Array.from({ length: 20 }, (_, i) => ({
tool: "read",
input: { filePath: `/src/file-${i}.ts` },
}))
const window = buildWindowWithInputs(calls, {
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
})
const result = detectRepetitiveToolUse(window)
expect(result.triggered).toBe(false)
})
})
describe("#given same tool with identical file inputs", () => {
test("#when evaluated #then it triggers with bare tool name", () => {
const calls = [
...Array.from({ length: 16 }, () => ({ tool: "read", input: { filePath: "/src/same.ts" } })),
{ tool: "grep", input: { pattern: "foo" } },
{ tool: "edit", input: { filePath: "/src/other.ts" } },
{ tool: "bash", input: { command: "ls" } },
{ tool: "glob", input: { pattern: "**/*.ts" } },
]
const window = buildWindowWithInputs(calls, {
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
})
const result = detectRepetitiveToolUse(window)
expect(result.triggered).toBe(true)
expect(result.toolName).toBe("read")
expect(result.repeatedCount).toBe(16)
})
})
describe("#given tool calls with no input", () => {
test("#when the same tool dominates #then falls back to name-only detection", () => {
const calls = [
...Array.from({ length: 16 }, () => ({ tool: "read" })),
{ tool: "grep" },
{ tool: "edit" },
{ tool: "bash" },
{ tool: "glob" },
]
const window = buildWindowWithInputs(calls, {
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
})
const result = detectRepetitiveToolUse(window)
expect(result.triggered).toBe(true)
expect(result.toolName).toBe("read")
})
})
})
})

View File

@@ -1,5 +1,6 @@
import type { BackgroundTaskConfig } from "../../config/schema"
import {
DEFAULT_CIRCUIT_BREAKER_ENABLED,
DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT,
DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
DEFAULT_MAX_TOOL_CALLS,
@@ -7,6 +8,7 @@ import {
import type { ToolCallWindow } from "./types"
export interface CircuitBreakerSettings {
enabled: boolean
maxToolCalls: number
windowSize: number
repetitionThresholdPercent: number
@@ -24,6 +26,7 @@ export function resolveCircuitBreakerSettings(
config?: BackgroundTaskConfig
): CircuitBreakerSettings {
return {
enabled: config?.circuitBreaker?.enabled ?? DEFAULT_CIRCUIT_BREAKER_ENABLED,
maxToolCalls:
config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS,
windowSize: config?.circuitBreaker?.windowSize ?? DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
@@ -36,28 +39,56 @@ export function resolveCircuitBreakerSettings(
export function recordToolCall(
window: ToolCallWindow | undefined,
toolName: string,
settings: CircuitBreakerSettings
settings: CircuitBreakerSettings,
toolInput?: Record<string, unknown> | null
): ToolCallWindow {
const previous = window?.toolNames ?? []
const toolNames = [...previous, toolName].slice(-settings.windowSize)
const previous = window?.toolSignatures ?? []
const signature = createToolCallSignature(toolName, toolInput)
const toolSignatures = [...previous, signature].slice(-settings.windowSize)
return {
toolNames,
toolSignatures,
windowSize: settings.windowSize,
thresholdPercent: settings.repetitionThresholdPercent,
}
}
function sortObject(obj: unknown): unknown {
if (obj === null || obj === undefined) return obj
if (typeof obj !== "object") return obj
if (Array.isArray(obj)) return obj.map(sortObject)
const sorted: Record<string, unknown> = {}
const keys = Object.keys(obj as Record<string, unknown>).sort()
for (const key of keys) {
sorted[key] = sortObject((obj as Record<string, unknown>)[key])
}
return sorted
}
export function createToolCallSignature(
toolName: string,
toolInput?: Record<string, unknown> | null
): string {
if (toolInput === undefined || toolInput === null) {
return toolName
}
if (Object.keys(toolInput).length === 0) {
return toolName
}
return `${toolName}::${JSON.stringify(sortObject(toolInput))}`
}
export function detectRepetitiveToolUse(
window: ToolCallWindow | undefined
): ToolLoopDetectionResult {
if (!window || window.toolNames.length === 0) {
if (!window || window.toolSignatures.length === 0) {
return { triggered: false }
}
const counts = new Map<string, number>()
for (const toolName of window.toolNames) {
counts.set(toolName, (counts.get(toolName) ?? 0) + 1)
for (const signature of window.toolSignatures) {
counts.set(signature, (counts.get(signature) ?? 0) + 1)
}
let repeatedTool: string | undefined
@@ -70,7 +101,7 @@ export function detectRepetitiveToolUse(
}
}
const sampleSize = window.toolNames.length
const sampleSize = window.toolSignatures.length
const minimumSampleSize = Math.min(
window.windowSize,
Math.ceil((window.windowSize * window.thresholdPercent) / 100)
@@ -88,7 +119,7 @@ export function detectRepetitiveToolUse(
return {
triggered: true,
toolName: repeatedTool,
toolName: repeatedTool.split("::")[0],
repeatedCount,
sampleSize,
thresholdPercent: window.thresholdPercent,

View File

@@ -236,4 +236,181 @@ describe("BackgroundManager circuit breaker", () => {
expect(task.progress?.countedToolPartIDs).toEqual(["tool-1"])
})
})
describe("#given same tool reading different files", () => {
test("#when tool events arrive with state.input #then task keeps running", async () => {
const manager = createManager({
circuitBreaker: {
windowSize: 20,
repetitionThresholdPercent: 80,
},
})
const task: BackgroundTask = {
id: "task-diff-files-1",
sessionID: "session-diff-files-1",
parentSessionID: "parent-1",
parentMessageID: "msg-1",
description: "Reading different files",
prompt: "work",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - 60_000),
progress: {
toolCalls: 0,
lastUpdate: new Date(Date.now() - 60_000),
},
}
getTaskMap(manager).set(task.id, task)
for (let i = 0; i < 20; i++) {
manager.handleEvent({
type: "message.part.updated",
properties: {
part: {
sessionID: task.sessionID,
type: "tool",
tool: "read",
state: { status: "running", input: { filePath: `/src/file-${i}.ts` } },
},
},
})
}
await flushAsyncWork()
expect(task.status).toBe("running")
expect(task.progress?.toolCalls).toBe(20)
})
})
describe("#given same tool reading same file repeatedly", () => {
test("#when tool events arrive with state.input #then task is cancelled with bare tool name in error", async () => {
const manager = createManager({
circuitBreaker: {
windowSize: 20,
repetitionThresholdPercent: 80,
},
})
const task: BackgroundTask = {
id: "task-same-file-1",
sessionID: "session-same-file-1",
parentSessionID: "parent-1",
parentMessageID: "msg-1",
description: "Reading same file repeatedly",
prompt: "work",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - 60_000),
progress: {
toolCalls: 0,
lastUpdate: new Date(Date.now() - 60_000),
},
}
getTaskMap(manager).set(task.id, task)
for (let i = 0; i < 20; i++) {
manager.handleEvent({
type: "message.part.updated",
properties: {
part: {
sessionID: task.sessionID,
type: "tool",
tool: "read",
state: { status: "running", input: { filePath: "/src/same.ts" } },
},
},
})
}
await flushAsyncWork()
expect(task.status).toBe("cancelled")
expect(task.error).toContain("repeatedly called read")
expect(task.error).not.toContain("::")
})
})
describe("#given circuit breaker enabled is false", () => {
test("#when repetitive tools arrive #then task keeps running", async () => {
const manager = createManager({
circuitBreaker: {
enabled: false,
windowSize: 20,
repetitionThresholdPercent: 80,
},
})
const task: BackgroundTask = {
id: "task-disabled-1",
sessionID: "session-disabled-1",
parentSessionID: "parent-1",
parentMessageID: "msg-1",
description: "Disabled circuit breaker task",
prompt: "work",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - 60_000),
progress: {
toolCalls: 0,
lastUpdate: new Date(Date.now() - 60_000),
},
}
getTaskMap(manager).set(task.id, task)
for (let i = 0; i < 20; i++) {
manager.handleEvent({
type: "message.part.updated",
properties: {
sessionID: task.sessionID,
type: "tool",
tool: "read",
},
})
}
await flushAsyncWork()
expect(task.status).toBe("running")
})
})
describe("#given circuit breaker enabled is false but absolute cap is low", () => {
test("#when max tool calls exceeded #then task is still cancelled by absolute cap", async () => {
const manager = createManager({
maxToolCalls: 3,
circuitBreaker: {
enabled: false,
windowSize: 10,
repetitionThresholdPercent: 95,
},
})
const task: BackgroundTask = {
id: "task-cap-disabled-1",
sessionID: "session-cap-disabled-1",
parentSessionID: "parent-1",
parentMessageID: "msg-1",
description: "Backstop task with disabled circuit breaker",
prompt: "work",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - 60_000),
progress: {
toolCalls: 0,
lastUpdate: new Date(Date.now() - 60_000),
},
}
getTaskMap(manager).set(task.id, task)
for (const toolName of ["read", "grep", "edit"]) {
manager.handleEvent({
type: "message.part.updated",
properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
})
}
await flushAsyncWork()
expect(task.status).toBe("cancelled")
expect(task.error).toContain("maximum tool call limit (3)")
})
})
})

View File

@@ -74,7 +74,7 @@ interface MessagePartInfo {
sessionID?: string
type?: string
tool?: string
state?: { status?: string }
state?: { status?: string; input?: Record<string, unknown> }
}
interface EventProperties {
@@ -918,29 +918,32 @@ export class BackgroundManager {
task.progress.lastTool = partInfo.tool
const circuitBreaker = resolveCircuitBreakerSettings(this.config)
if (partInfo.tool) {
task.progress.toolCallWindow = recordToolCall(
task.progress.toolCallWindow,
partInfo.tool,
circuitBreaker
)
task.progress.toolCallWindow = recordToolCall(
task.progress.toolCallWindow,
partInfo.tool,
circuitBreaker,
partInfo.state?.input
)
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
if (loopDetection.triggered) {
log("[background-agent] Circuit breaker: repetitive tool usage detected", {
taskId: task.id,
agent: task.agent,
sessionID,
toolName: loopDetection.toolName,
repeatedCount: loopDetection.repeatedCount,
sampleSize: loopDetection.sampleSize,
thresholdPercent: loopDetection.thresholdPercent,
})
void this.cancelTask(task.id, {
source: "circuit-breaker",
reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
})
return
}
if (circuitBreaker.enabled) {
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
if (loopDetection.triggered) {
log("[background-agent] Circuit breaker: repetitive tool usage detected", {
taskId: task.id,
agent: task.agent,
sessionID,
toolName: loopDetection.toolName,
repeatedCount: loopDetection.repeatedCount,
sampleSize: loopDetection.sampleSize,
thresholdPercent: loopDetection.thresholdPercent,
})
void this.cancelTask(task.id, {
source: "circuit-breaker",
reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
})
return
}
}
}
const maxToolCalls = circuitBreaker.maxToolCalls

View File

@@ -10,7 +10,7 @@ export type BackgroundTaskStatus =
| "interrupt"
export interface ToolCallWindow {
toolNames: string[]
toolSignatures: string[]
windowSize: number
thresholdPercent: number
}

View File

@@ -7,7 +7,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
- \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
- If specified and valid: hook pre-sets worktree_path in boulder.json
- If specified but invalid: you must run \`git worktree add <path> <branch>\` first
- If omitted: you MUST choose or create a worktree (see Worktree Setup below)
- If omitted: work directly in the current project directory (no worktree)
## WHAT TO DO
@@ -24,7 +24,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
- If ONE plan: auto-select it
- If MULTIPLE plans: show list with timestamps, ask user to select
4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
4. **Worktree Setup** (ONLY when \`--worktree\` was explicitly specified and \`worktree_path\` not already set in boulder.json):
1. \`git worktree list --porcelain\` — see available worktrees
2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
@@ -86,6 +86,38 @@ Reading plan and beginning execution...
- The session_id is injected by the hook - use it directly
- Always update boulder.json BEFORE starting work
- Always set worktree_path in boulder.json before executing any tasks
- If worktree_path is set in boulder.json, all work happens inside that worktree directory
- Read the FULL plan file before delegating any tasks
- Follow atlas delegation protocols (7-section format)`
- Follow atlas delegation protocols (7-section format)
## TASK BREAKDOWN (MANDATORY)
After reading the plan file, you MUST decompose every plan task into granular, implementation-level sub-steps and register ALL of them as task/todo items BEFORE starting any work.
**How to break down**:
- Each plan checkbox item (e.g., \`- [ ] Add user authentication\`) must be split into concrete, actionable sub-tasks
- Sub-tasks should be specific enough that each one touches a clear set of files/functions
- Include: file to modify, what to change, expected behavior, and how to verify
- Do NOT leave any task vague — "implement feature X" is NOT acceptable; "add validateToken() to src/auth/middleware.ts that checks JWT expiry and returns 401" IS acceptable
**Example breakdown**:
Plan task: \`- [ ] Add rate limiting to API\`
→ Todo items:
1. Create \`src/middleware/rate-limiter.ts\` with sliding window algorithm (max 100 req/min per IP)
2. Add RateLimiter middleware to \`src/app.ts\` router chain, before auth middleware
3. Add rate limit headers (X-RateLimit-Limit, X-RateLimit-Remaining) to response in \`rate-limiter.ts\`
4. Add test: verify 429 response after exceeding limit in \`src/middleware/rate-limiter.test.ts\`
5. Add test: verify headers are present on normal responses
Register these as task/todo items so progress is tracked and visible throughout the session.
## WORKTREE COMPLETION
When working in a worktree (\`worktree_path\` is set in boulder.json) and ALL plan tasks are complete:
1. Commit all remaining changes in the worktree
2. Switch to the main working directory (the original repo, NOT the worktree)
3. Merge the worktree branch into the current branch: \`git merge <worktree-branch>\`
4. If merge succeeds, clean up: \`git worktree remove <worktree-path>\`
5. Remove the boulder.json state
This is the DEFAULT behavior when \`--worktree\` was used. Skip merge only if the user explicitly instructs otherwise (e.g., asks to create a PR instead).`

View File

@@ -52,3 +52,4 @@ export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
export { createJsonErrorRecoveryHook, JSON_ERROR_TOOL_EXCLUDE_LIST, JSON_ERROR_PATTERNS, JSON_ERROR_REMINDER } from "./json-error-recovery";
export { createReadImageResizerHook } from "./read-image-resizer"
export { createTodoDescriptionOverrideHook } from "./todo-description-override"

View File

@@ -101,7 +101,13 @@ export function createAutoRetryHelpers(deps: HookDeps) {
return
}
const retryModelPayload = buildRetryModelPayload(newModel)
const agentSettings = resolvedAgent
? pluginConfig?.agents?.[resolvedAgent as keyof typeof pluginConfig.agents]
: undefined
const retryModelPayload = buildRetryModelPayload(newModel, agentSettings ? {
variant: agentSettings.variant,
reasoningEffort: agentSettings.reasoningEffort,
} : undefined)
if (!retryModelPayload) {
log(`[${HOOK_NAME}] Invalid model format (missing provider prefix): ${newModel}`)
const state = sessionStates.get(sessionID)

View File

@@ -0,0 +1,114 @@
import { describe, test, expect } from "bun:test"
import { buildRetryModelPayload } from "./retry-model-payload"
describe("buildRetryModelPayload", () => {
test("should return undefined for empty model string", () => {
// given
const model = ""
// when
const result = buildRetryModelPayload(model)
// then
expect(result).toBeUndefined()
})
test("should return undefined for model without provider prefix", () => {
// given
const model = "kimi-k2.5"
// when
const result = buildRetryModelPayload(model)
// then
expect(result).toBeUndefined()
})
test("should parse provider and model ID", () => {
// given
const model = "chutes/kimi-k2.5"
// when
const result = buildRetryModelPayload(model)
// then
expect(result).toEqual({
model: { providerID: "chutes", modelID: "kimi-k2.5" },
})
})
test("should include variant from model string", () => {
// given
const model = "anthropic/claude-sonnet-4-5 high"
// when
const result = buildRetryModelPayload(model)
// then
expect(result).toEqual({
model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" },
variant: "high",
})
})
test("should use agent variant when model string has no variant", () => {
// given
const model = "chutes/kimi-k2.5"
const agentSettings = { variant: "max" }
// when
const result = buildRetryModelPayload(model, agentSettings)
// then
expect(result).toEqual({
model: { providerID: "chutes", modelID: "kimi-k2.5" },
variant: "max",
})
})
test("should prefer model string variant over agent variant", () => {
// given
const model = "anthropic/claude-sonnet-4-5 high"
const agentSettings = { variant: "max" }
// when
const result = buildRetryModelPayload(model, agentSettings)
// then
expect(result).toEqual({
model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" },
variant: "high",
})
})
test("should include reasoningEffort from agent settings", () => {
// given
const model = "openai/gpt-5.4"
const agentSettings = { variant: "high", reasoningEffort: "xhigh" }
// when
const result = buildRetryModelPayload(model, agentSettings)
// then
expect(result).toEqual({
model: { providerID: "openai", modelID: "gpt-5.4" },
variant: "high",
reasoningEffort: "xhigh",
})
})
test("should not include reasoningEffort when agent settings has none", () => {
// given
const model = "chutes/kimi-k2.5"
const agentSettings = { variant: "medium" }
// when
const result = buildRetryModelPayload(model, agentSettings)
// then
expect(result).toEqual({
model: { providerID: "chutes", modelID: "kimi-k2.5" },
variant: "medium",
})
})
})

View File

@@ -2,24 +2,29 @@ import { parseModelString } from "../../tools/delegate-task/model-string-parser"
export function buildRetryModelPayload(
model: string,
): { model: { providerID: string; modelID: string }; variant?: string } | undefined {
agentSettings?: { variant?: string; reasoningEffort?: string },
): { model: { providerID: string; modelID: string }; variant?: string; reasoningEffort?: string } | undefined {
const parsedModel = parseModelString(model)
if (!parsedModel) {
return undefined
}
return parsedModel.variant
? {
model: {
providerID: parsedModel.providerID,
modelID: parsedModel.modelID,
},
variant: parsedModel.variant,
}
: {
model: {
providerID: parsedModel.providerID,
modelID: parsedModel.modelID,
},
}
const variant = parsedModel.variant ?? agentSettings?.variant
const reasoningEffort = agentSettings?.reasoningEffort
const payload: { model: { providerID: string; modelID: string }; variant?: string; reasoningEffort?: string } = {
model: {
providerID: parsedModel.providerID,
modelID: parsedModel.modelID,
},
}
if (variant) {
payload.variant = variant
}
if (reasoningEffort) {
payload.reasoningEffort = reasoningEffort
}
return payload
}

View File

@@ -0,0 +1,28 @@
export const TODOWRITE_DESCRIPTION = `Use this tool to create and manage a structured task list for tracking progress on multi-step work.
## Todo Format (MANDATORY)
Each todo title MUST encode four elements: WHERE, WHY, HOW, and EXPECTED RESULT.
Format: "[WHERE] [HOW] to [WHY] — expect [RESULT]"
GOOD:
- "src/utils/validation.ts: Add validateEmail() for input sanitization — returns boolean"
- "UserService.create(): Call validateEmail() before DB insert — rejects invalid emails with 400"
- "validation.test.ts: Add test for missing @ sign — expect validateEmail('foo') to return false"
BAD:
- "Implement email validation" (where? how? what result?)
- "Add dark mode" (this is a feature, not a todo)
- "Fix auth" (what file? what changes? what's expected?)
## Granularity Rules
Each todo MUST be a single atomic action completable in 1-3 tool calls. If it needs more, split it.
**Size test**: Can you complete this todo by editing one file or running one command? If not, it's too big.
## Task Management
- One in_progress at a time. Complete it before starting the next.
- Mark completed immediately after finishing each item.
- Skip this tool for single trivial tasks (one-step, obvious action).`

View File

@@ -0,0 +1,14 @@
import { TODOWRITE_DESCRIPTION } from "./description"
export function createTodoDescriptionOverrideHook() {
return {
"tool.definition": async (
input: { toolID: string },
output: { description: string; parameters: unknown },
) => {
if (input.toolID === "todowrite") {
output.description = TODOWRITE_DESCRIPTION
}
},
}
}

View File

@@ -0,0 +1,40 @@
import { describe, it, expect } from "bun:test"
import { createTodoDescriptionOverrideHook } from "./hook"
import { TODOWRITE_DESCRIPTION } from "./description"
describe("createTodoDescriptionOverrideHook", () => {
describe("#given hook is created", () => {
describe("#when tool.definition is called with todowrite", () => {
it("#then should override the description", async () => {
const hook = createTodoDescriptionOverrideHook()
const output = { description: "original description", parameters: {} }
await hook["tool.definition"]({ toolID: "todowrite" }, output)
expect(output.description).toBe(TODOWRITE_DESCRIPTION)
})
})
describe("#when tool.definition is called with non-todowrite tool", () => {
it("#then should not modify the description", async () => {
const hook = createTodoDescriptionOverrideHook()
const output = { description: "original description", parameters: {} }
await hook["tool.definition"]({ toolID: "bash" }, output)
expect(output.description).toBe("original description")
})
})
describe("#when tool.definition is called with TodoWrite (case-insensitive)", () => {
it("#then should not override for different casing since OpenCode sends lowercase", async () => {
const hook = createTodoDescriptionOverrideHook()
const output = { description: "original description", parameters: {} }
await hook["tool.definition"]({ toolID: "TodoWrite" }, output)
expect(output.description).toBe("original description")
})
})
})
})

View File

@@ -0,0 +1 @@
export { createTodoDescriptionOverrideHook } from "./hook"

View File

@@ -71,5 +71,9 @@ export function createPluginInterface(args: {
ctx,
hooks,
}),
"tool.definition": async (input, output) => {
await hooks.todoDescriptionOverride?.["tool.definition"]?.(input, output)
},
}
}

View File

@@ -14,6 +14,7 @@ import {
createHashlineReadEnhancerHook,
createReadImageResizerHook,
createJsonErrorRecoveryHook,
createTodoDescriptionOverrideHook,
} from "../../hooks"
import {
getOpenCodeVersion,
@@ -35,6 +36,7 @@ export type ToolGuardHooks = {
hashlineReadEnhancer: ReturnType<typeof createHashlineReadEnhancerHook> | null
jsonErrorRecovery: ReturnType<typeof createJsonErrorRecoveryHook> | null
readImageResizer: ReturnType<typeof createReadImageResizerHook> | null
todoDescriptionOverride: ReturnType<typeof createTodoDescriptionOverrideHook> | null
}
export function createToolGuardHooks(args: {
@@ -111,6 +113,10 @@ export function createToolGuardHooks(args: {
? safeHook("read-image-resizer", () => createReadImageResizerHook(ctx))
: null
const todoDescriptionOverride = isHookEnabled("todo-description-override")
? safeHook("todo-description-override", () => createTodoDescriptionOverrideHook())
: null
return {
commentChecker,
toolOutputTruncator,
@@ -123,5 +129,6 @@ export function createToolGuardHooks(args: {
hashlineReadEnhancer,
jsonErrorRecovery,
readImageResizer,
todoDescriptionOverride,
}
}