Compare commits
14 Commits
v3.12.0
...
fix/plan-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef8f22caba | ||
|
|
55ac653eaa | ||
|
|
1d5652dfa9 | ||
|
|
76c460536d | ||
|
|
b067d4a284 | ||
|
|
94838ec039 | ||
|
|
224ecea8c7 | ||
|
|
5d5755f29d | ||
|
|
1fdce01fd2 | ||
|
|
c8213c970e | ||
|
|
576ff453e5 | ||
|
|
9b8aca45f9 | ||
|
|
f1f20f5a79 | ||
|
|
de40caf76d |
98
.github/workflows/publish-platform.yml
vendored
98
.github/workflows/publish-platform.yml
vendored
@@ -59,20 +59,39 @@ jobs:
|
|||||||
- name: Check if already published
|
- name: Check if already published
|
||||||
id: check
|
id: check
|
||||||
run: |
|
run: |
|
||||||
PKG_NAME="oh-my-opencode-${{ matrix.platform }}"
|
|
||||||
VERSION="${{ inputs.version }}"
|
VERSION="${{ inputs.version }}"
|
||||||
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/${PKG_NAME}/${VERSION}")
|
|
||||||
# Convert platform name for output (replace - with _)
|
|
||||||
PLATFORM_KEY="${{ matrix.platform }}"
|
PLATFORM_KEY="${{ matrix.platform }}"
|
||||||
PLATFORM_KEY="${PLATFORM_KEY//-/_}"
|
PLATFORM_KEY="${PLATFORM_KEY//-/_}"
|
||||||
if [ "$STATUS" = "200" ]; then
|
|
||||||
|
# Check oh-my-opencode
|
||||||
|
OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}")
|
||||||
|
# Check oh-my-openagent
|
||||||
|
OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}")
|
||||||
|
|
||||||
|
echo "oh-my-opencode-${{ matrix.platform }}@${VERSION}: ${OC_STATUS}"
|
||||||
|
echo "oh-my-openagent-${{ matrix.platform }}@${VERSION}: ${OA_STATUS}"
|
||||||
|
|
||||||
|
if [ "$OC_STATUS" = "200" ]; then
|
||||||
|
echo "skip_opencode=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "✓ oh-my-opencode-${{ matrix.platform }}@${VERSION} already published"
|
||||||
|
else
|
||||||
|
echo "skip_opencode=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "→ oh-my-opencode-${{ matrix.platform }}@${VERSION} needs publishing"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$OA_STATUS" = "200" ]; then
|
||||||
|
echo "skip_openagent=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "✓ oh-my-openagent-${{ matrix.platform }}@${VERSION} already published"
|
||||||
|
else
|
||||||
|
echo "skip_openagent=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "→ oh-my-openagent-${{ matrix.platform }}@${VERSION} needs publishing"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip build only if BOTH are already published
|
||||||
|
if [ "$OC_STATUS" = "200" ] && [ "$OA_STATUS" = "200" ]; then
|
||||||
echo "skip=true" >> $GITHUB_OUTPUT
|
echo "skip=true" >> $GITHUB_OUTPUT
|
||||||
echo "skip_${PLATFORM_KEY}=true" >> $GITHUB_OUTPUT
|
|
||||||
echo "✓ ${PKG_NAME}@${VERSION} already published"
|
|
||||||
else
|
else
|
||||||
echo "skip=false" >> $GITHUB_OUTPUT
|
echo "skip=false" >> $GITHUB_OUTPUT
|
||||||
echo "skip_${PLATFORM_KEY}=false" >> $GITHUB_OUTPUT
|
|
||||||
echo "→ ${PKG_NAME}@${VERSION} needs publishing"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Update version in package.json
|
- name: Update version in package.json
|
||||||
@@ -207,23 +226,38 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
|
platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
|
||||||
steps:
|
steps:
|
||||||
- name: Check if oh-my-opencode already published
|
- name: Check if already published
|
||||||
id: check
|
id: check
|
||||||
run: |
|
run: |
|
||||||
PKG_NAME="oh-my-opencode-${{ matrix.platform }}"
|
|
||||||
VERSION="${{ inputs.version }}"
|
VERSION="${{ inputs.version }}"
|
||||||
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/${PKG_NAME}/${VERSION}")
|
|
||||||
if [ "$STATUS" = "200" ]; then
|
OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}")
|
||||||
echo "skip=true" >> $GITHUB_OUTPUT
|
OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}")
|
||||||
echo "✓ ${PKG_NAME}@${VERSION} already published, skipping"
|
|
||||||
|
if [ "$OC_STATUS" = "200" ]; then
|
||||||
|
echo "skip_opencode=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "✓ oh-my-opencode-${{ matrix.platform }}@${VERSION} already published"
|
||||||
else
|
else
|
||||||
echo "skip=false" >> $GITHUB_OUTPUT
|
echo "skip_opencode=false" >> $GITHUB_OUTPUT
|
||||||
echo "→ ${PKG_NAME}@${VERSION} will be published"
|
fi
|
||||||
|
|
||||||
|
if [ "$OA_STATUS" = "200" ]; then
|
||||||
|
echo "skip_openagent=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "✓ oh-my-openagent-${{ matrix.platform }}@${VERSION} already published"
|
||||||
|
else
|
||||||
|
echo "skip_openagent=false" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Need artifact if either package needs publishing
|
||||||
|
if [ "$OC_STATUS" = "200" ] && [ "$OA_STATUS" = "200" ]; then
|
||||||
|
echo "skip_all=true" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "skip_all=false" >> $GITHUB_OUTPUT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Download artifact
|
- name: Download artifact
|
||||||
id: download
|
id: download
|
||||||
if: steps.check.outputs.skip != 'true'
|
if: steps.check.outputs.skip_all != 'true'
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -231,7 +265,7 @@ jobs:
|
|||||||
path: .
|
path: .
|
||||||
|
|
||||||
- name: Extract artifact
|
- name: Extract artifact
|
||||||
if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
|
if: steps.check.outputs.skip_all != 'true' && steps.download.outcome == 'success'
|
||||||
run: |
|
run: |
|
||||||
PLATFORM="${{ matrix.platform }}"
|
PLATFORM="${{ matrix.platform }}"
|
||||||
mkdir -p packages/${PLATFORM}
|
mkdir -p packages/${PLATFORM}
|
||||||
@@ -247,13 +281,13 @@ jobs:
|
|||||||
ls -la packages/${PLATFORM}/bin/
|
ls -la packages/${PLATFORM}/bin/
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
- uses: actions/setup-node@v4
|
||||||
if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
|
if: steps.check.outputs.skip_all != 'true' && steps.download.outcome == 'success'
|
||||||
with:
|
with:
|
||||||
node-version: "24"
|
node-version: "24"
|
||||||
registry-url: "https://registry.npmjs.org"
|
registry-url: "https://registry.npmjs.org"
|
||||||
|
|
||||||
- name: Publish ${{ matrix.platform }}
|
- name: Publish oh-my-opencode-${{ matrix.platform }}
|
||||||
if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
|
if: steps.check.outputs.skip_opencode != 'true' && steps.download.outcome == 'success'
|
||||||
run: |
|
run: |
|
||||||
cd packages/${{ matrix.platform }}
|
cd packages/${{ matrix.platform }}
|
||||||
|
|
||||||
@@ -267,3 +301,25 @@ jobs:
|
|||||||
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
||||||
NPM_CONFIG_PROVENANCE: true
|
NPM_CONFIG_PROVENANCE: true
|
||||||
timeout-minutes: 15
|
timeout-minutes: 15
|
||||||
|
|
||||||
|
- name: Publish oh-my-openagent-${{ matrix.platform }}
|
||||||
|
if: steps.check.outputs.skip_openagent != 'true' && steps.download.outcome == 'success'
|
||||||
|
run: |
|
||||||
|
cd packages/${{ matrix.platform }}
|
||||||
|
|
||||||
|
# Rename package for oh-my-openagent
|
||||||
|
jq --arg name "oh-my-openagent-${{ matrix.platform }}" \
|
||||||
|
--arg desc "Platform-specific binary for oh-my-openagent (${{ matrix.platform }})" \
|
||||||
|
'.name = $name | .description = $desc | .bin = {"oh-my-openagent": (.bin | to_entries | .[0].value)}' \
|
||||||
|
package.json > tmp.json && mv tmp.json package.json
|
||||||
|
|
||||||
|
TAG_ARG=""
|
||||||
|
if [ -n "${{ inputs.dist_tag }}" ]; then
|
||||||
|
TAG_ARG="--tag ${{ inputs.dist_tag }}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
npm publish --access public --provenance $TAG_ARG
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
||||||
|
NPM_CONFIG_PROVENANCE: true
|
||||||
|
timeout-minutes: 15
|
||||||
|
|||||||
42
.github/workflows/publish.yml
vendored
42
.github/workflows/publish.yml
vendored
@@ -216,6 +216,48 @@ jobs:
|
|||||||
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
||||||
NPM_CONFIG_PROVENANCE: true
|
NPM_CONFIG_PROVENANCE: true
|
||||||
|
|
||||||
|
- name: Check if oh-my-openagent already published
|
||||||
|
id: check-openagent
|
||||||
|
run: |
|
||||||
|
VERSION="${{ steps.version.outputs.version }}"
|
||||||
|
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent/${VERSION}")
|
||||||
|
if [ "$STATUS" = "200" ]; then
|
||||||
|
echo "skip=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "✓ oh-my-openagent@${VERSION} already published"
|
||||||
|
else
|
||||||
|
echo "skip=false" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Publish oh-my-openagent
|
||||||
|
if: steps.check-openagent.outputs.skip != 'true'
|
||||||
|
run: |
|
||||||
|
VERSION="${{ steps.version.outputs.version }}"
|
||||||
|
|
||||||
|
# Update package name, version, and optionalDependencies for oh-my-openagent
|
||||||
|
jq --arg v "$VERSION" '
|
||||||
|
.name = "oh-my-openagent" |
|
||||||
|
.version = $v |
|
||||||
|
.optionalDependencies = (
|
||||||
|
.optionalDependencies | to_entries |
|
||||||
|
map(.key = (.key | sub("^oh-my-opencode-"; "oh-my-openagent-")) | .value = $v) |
|
||||||
|
from_entries
|
||||||
|
)
|
||||||
|
' package.json > tmp.json && mv tmp.json package.json
|
||||||
|
|
||||||
|
TAG_ARG=""
|
||||||
|
if [ -n "${{ steps.version.outputs.dist_tag }}" ]; then
|
||||||
|
TAG_ARG="--tag ${{ steps.version.outputs.dist_tag }}"
|
||||||
|
fi
|
||||||
|
npm publish --access public --provenance $TAG_ARG || echo "::warning::oh-my-openagent publish failed"
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
||||||
|
NPM_CONFIG_PROVENANCE: true
|
||||||
|
|
||||||
|
- name: Restore package.json
|
||||||
|
if: steps.check-openagent.outputs.skip != 'true'
|
||||||
|
run: |
|
||||||
|
git checkout -- package.json
|
||||||
|
|
||||||
trigger-platform:
|
trigger-platform:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: publish-main
|
needs: publish-main
|
||||||
|
|||||||
@@ -3708,6 +3708,9 @@
|
|||||||
"circuitBreaker": {
|
"circuitBreaker": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"enabled": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"maxToolCalls": {
|
"maxToolCalls": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 10,
|
"minimum": 10,
|
||||||
|
|||||||
@@ -2223,6 +2223,22 @@
|
|||||||
"created_at": "2026-03-17T08:27:45Z",
|
"created_at": "2026-03-17T08:27:45Z",
|
||||||
"repoId": 1108837393,
|
"repoId": 1108837393,
|
||||||
"pullRequestNo": 2640
|
"pullRequestNo": 2640
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "tad-hq",
|
||||||
|
"id": 213478119,
|
||||||
|
"comment_id": 4077697128,
|
||||||
|
"created_at": "2026-03-17T20:07:09Z",
|
||||||
|
"repoId": 1108837393,
|
||||||
|
"pullRequestNo": 2655
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ogormans-deptstack",
|
||||||
|
"id": 208788555,
|
||||||
|
"comment_id": 4077893096,
|
||||||
|
"created_at": "2026-03-17T20:42:42Z",
|
||||||
|
"repoId": 1108837393,
|
||||||
|
"pullRequestNo": 2656
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import { z } from "zod"
|
import { z } from "zod"
|
||||||
|
|
||||||
const CircuitBreakerConfigSchema = z.object({
|
const CircuitBreakerConfigSchema = z.object({
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
maxToolCalls: z.number().int().min(10).optional(),
|
maxToolCalls: z.number().int().min(10).optional(),
|
||||||
windowSize: z.number().int().min(5).optional(),
|
windowSize: z.number().int().min(5).optional(),
|
||||||
repetitionThresholdPercent: z.number().gt(0).max(100).optional(),
|
repetitionThresholdPercent: z.number().gt(0).max(100).optional(),
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ export const HookNameSchema = z.enum([
|
|||||||
"anthropic-effort",
|
"anthropic-effort",
|
||||||
"hashline-read-enhancer",
|
"hashline-read-enhancer",
|
||||||
"read-image-resizer",
|
"read-image-resizer",
|
||||||
|
"todo-description-override",
|
||||||
])
|
])
|
||||||
|
|
||||||
export type HookName = z.infer<typeof HookNameSchema>
|
export type HookName = z.infer<typeof HookNameSchema>
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
|
|||||||
export const DEFAULT_MAX_TOOL_CALLS = 200
|
export const DEFAULT_MAX_TOOL_CALLS = 200
|
||||||
export const DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE = 20
|
export const DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE = 20
|
||||||
export const DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT = 80
|
export const DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT = 80
|
||||||
|
export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true
|
||||||
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
|
export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
|
||||||
export const MIN_IDLE_TIME_MS = 5000
|
export const MIN_IDLE_TIME_MS = 5000
|
||||||
export const POLLING_INTERVAL_MS = 3000
|
export const POLLING_INTERVAL_MS = 3000
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { describe, expect, test } from "bun:test"
|
import { describe, expect, test } from "bun:test"
|
||||||
import {
|
import {
|
||||||
|
createToolCallSignature,
|
||||||
detectRepetitiveToolUse,
|
detectRepetitiveToolUse,
|
||||||
recordToolCall,
|
recordToolCall,
|
||||||
resolveCircuitBreakerSettings,
|
resolveCircuitBreakerSettings,
|
||||||
@@ -17,6 +18,17 @@ function buildWindow(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildWindowWithInputs(
|
||||||
|
calls: Array<{ tool: string; input?: Record<string, unknown> }>,
|
||||||
|
override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
|
||||||
|
) {
|
||||||
|
const settings = resolveCircuitBreakerSettings(override)
|
||||||
|
return calls.reduce(
|
||||||
|
(window, { tool, input }) => recordToolCall(window, tool, settings, input),
|
||||||
|
undefined as ReturnType<typeof recordToolCall> | undefined
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
describe("loop-detector", () => {
|
describe("loop-detector", () => {
|
||||||
describe("resolveCircuitBreakerSettings", () => {
|
describe("resolveCircuitBreakerSettings", () => {
|
||||||
describe("#given nested circuit breaker config", () => {
|
describe("#given nested circuit breaker config", () => {
|
||||||
@@ -31,12 +43,90 @@ describe("loop-detector", () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
expect(result).toEqual({
|
expect(result).toEqual({
|
||||||
|
enabled: true,
|
||||||
maxToolCalls: 120,
|
maxToolCalls: 120,
|
||||||
windowSize: 10,
|
windowSize: 10,
|
||||||
repetitionThresholdPercent: 70,
|
repetitionThresholdPercent: 70,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("#given no enabled config", () => {
|
||||||
|
test("#when resolved #then enabled defaults to true", () => {
|
||||||
|
const result = resolveCircuitBreakerSettings({
|
||||||
|
circuitBreaker: {
|
||||||
|
maxToolCalls: 100,
|
||||||
|
windowSize: 5,
|
||||||
|
repetitionThresholdPercent: 60,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.enabled).toBe(true)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given enabled is false in config", () => {
|
||||||
|
test("#when resolved #then enabled is false", () => {
|
||||||
|
const result = resolveCircuitBreakerSettings({
|
||||||
|
circuitBreaker: {
|
||||||
|
enabled: false,
|
||||||
|
maxToolCalls: 100,
|
||||||
|
windowSize: 5,
|
||||||
|
repetitionThresholdPercent: 60,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.enabled).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given enabled is true in config", () => {
|
||||||
|
test("#when resolved #then enabled is true", () => {
|
||||||
|
const result = resolveCircuitBreakerSettings({
|
||||||
|
circuitBreaker: {
|
||||||
|
enabled: true,
|
||||||
|
maxToolCalls: 100,
|
||||||
|
windowSize: 5,
|
||||||
|
repetitionThresholdPercent: 60,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(result.enabled).toBe(true)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("createToolCallSignature", () => {
|
||||||
|
test("#given tool with input #when signature created #then includes tool and sorted input", () => {
|
||||||
|
const result = createToolCallSignature("read", { filePath: "/a.ts" })
|
||||||
|
|
||||||
|
expect(result).toBe('read::{"filePath":"/a.ts"}')
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#given tool with undefined input #when signature created #then returns bare tool name", () => {
|
||||||
|
const result = createToolCallSignature("read", undefined)
|
||||||
|
|
||||||
|
expect(result).toBe("read")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#given tool with null input #when signature created #then returns bare tool name", () => {
|
||||||
|
const result = createToolCallSignature("read", null)
|
||||||
|
|
||||||
|
expect(result).toBe("read")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#given tool with empty object input #when signature created #then returns bare tool name", () => {
|
||||||
|
const result = createToolCallSignature("read", {})
|
||||||
|
|
||||||
|
expect(result).toBe("read")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("#given same input different key order #when signatures compared #then they are equal", () => {
|
||||||
|
const first = createToolCallSignature("read", { filePath: "/a.ts", offset: 0 })
|
||||||
|
const second = createToolCallSignature("read", { offset: 0, filePath: "/a.ts" })
|
||||||
|
|
||||||
|
expect(first).toBe(second)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe("detectRepetitiveToolUse", () => {
|
describe("detectRepetitiveToolUse", () => {
|
||||||
@@ -113,5 +203,56 @@ describe("loop-detector", () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("#given same tool with different file inputs", () => {
|
||||||
|
test("#when evaluated #then it does not trigger", () => {
|
||||||
|
const calls = Array.from({ length: 20 }, (_, i) => ({
|
||||||
|
tool: "read",
|
||||||
|
input: { filePath: `/src/file-${i}.ts` },
|
||||||
|
}))
|
||||||
|
const window = buildWindowWithInputs(calls, {
|
||||||
|
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
|
||||||
|
})
|
||||||
|
const result = detectRepetitiveToolUse(window)
|
||||||
|
expect(result.triggered).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given same tool with identical file inputs", () => {
|
||||||
|
test("#when evaluated #then it triggers with bare tool name", () => {
|
||||||
|
const calls = [
|
||||||
|
...Array.from({ length: 16 }, () => ({ tool: "read", input: { filePath: "/src/same.ts" } })),
|
||||||
|
{ tool: "grep", input: { pattern: "foo" } },
|
||||||
|
{ tool: "edit", input: { filePath: "/src/other.ts" } },
|
||||||
|
{ tool: "bash", input: { command: "ls" } },
|
||||||
|
{ tool: "glob", input: { pattern: "**/*.ts" } },
|
||||||
|
]
|
||||||
|
const window = buildWindowWithInputs(calls, {
|
||||||
|
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
|
||||||
|
})
|
||||||
|
const result = detectRepetitiveToolUse(window)
|
||||||
|
expect(result.triggered).toBe(true)
|
||||||
|
expect(result.toolName).toBe("read")
|
||||||
|
expect(result.repeatedCount).toBe(16)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given tool calls with no input", () => {
|
||||||
|
test("#when the same tool dominates #then falls back to name-only detection", () => {
|
||||||
|
const calls = [
|
||||||
|
...Array.from({ length: 16 }, () => ({ tool: "read" })),
|
||||||
|
{ tool: "grep" },
|
||||||
|
{ tool: "edit" },
|
||||||
|
{ tool: "bash" },
|
||||||
|
{ tool: "glob" },
|
||||||
|
]
|
||||||
|
const window = buildWindowWithInputs(calls, {
|
||||||
|
circuitBreaker: { windowSize: 20, repetitionThresholdPercent: 80 },
|
||||||
|
})
|
||||||
|
const result = detectRepetitiveToolUse(window)
|
||||||
|
expect(result.triggered).toBe(true)
|
||||||
|
expect(result.toolName).toBe("read")
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import type { BackgroundTaskConfig } from "../../config/schema"
|
import type { BackgroundTaskConfig } from "../../config/schema"
|
||||||
import {
|
import {
|
||||||
|
DEFAULT_CIRCUIT_BREAKER_ENABLED,
|
||||||
DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT,
|
DEFAULT_CIRCUIT_BREAKER_REPETITION_THRESHOLD_PERCENT,
|
||||||
DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
|
DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
|
||||||
DEFAULT_MAX_TOOL_CALLS,
|
DEFAULT_MAX_TOOL_CALLS,
|
||||||
@@ -7,6 +8,7 @@ import {
|
|||||||
import type { ToolCallWindow } from "./types"
|
import type { ToolCallWindow } from "./types"
|
||||||
|
|
||||||
export interface CircuitBreakerSettings {
|
export interface CircuitBreakerSettings {
|
||||||
|
enabled: boolean
|
||||||
maxToolCalls: number
|
maxToolCalls: number
|
||||||
windowSize: number
|
windowSize: number
|
||||||
repetitionThresholdPercent: number
|
repetitionThresholdPercent: number
|
||||||
@@ -24,6 +26,7 @@ export function resolveCircuitBreakerSettings(
|
|||||||
config?: BackgroundTaskConfig
|
config?: BackgroundTaskConfig
|
||||||
): CircuitBreakerSettings {
|
): CircuitBreakerSettings {
|
||||||
return {
|
return {
|
||||||
|
enabled: config?.circuitBreaker?.enabled ?? DEFAULT_CIRCUIT_BREAKER_ENABLED,
|
||||||
maxToolCalls:
|
maxToolCalls:
|
||||||
config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS,
|
config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS,
|
||||||
windowSize: config?.circuitBreaker?.windowSize ?? DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
|
windowSize: config?.circuitBreaker?.windowSize ?? DEFAULT_CIRCUIT_BREAKER_WINDOW_SIZE,
|
||||||
@@ -36,28 +39,56 @@ export function resolveCircuitBreakerSettings(
|
|||||||
export function recordToolCall(
|
export function recordToolCall(
|
||||||
window: ToolCallWindow | undefined,
|
window: ToolCallWindow | undefined,
|
||||||
toolName: string,
|
toolName: string,
|
||||||
settings: CircuitBreakerSettings
|
settings: CircuitBreakerSettings,
|
||||||
|
toolInput?: Record<string, unknown> | null
|
||||||
): ToolCallWindow {
|
): ToolCallWindow {
|
||||||
const previous = window?.toolNames ?? []
|
const previous = window?.toolSignatures ?? []
|
||||||
const toolNames = [...previous, toolName].slice(-settings.windowSize)
|
const signature = createToolCallSignature(toolName, toolInput)
|
||||||
|
const toolSignatures = [...previous, signature].slice(-settings.windowSize)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
toolNames,
|
toolSignatures,
|
||||||
windowSize: settings.windowSize,
|
windowSize: settings.windowSize,
|
||||||
thresholdPercent: settings.repetitionThresholdPercent,
|
thresholdPercent: settings.repetitionThresholdPercent,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sortObject(obj: unknown): unknown {
|
||||||
|
if (obj === null || obj === undefined) return obj
|
||||||
|
if (typeof obj !== "object") return obj
|
||||||
|
if (Array.isArray(obj)) return obj.map(sortObject)
|
||||||
|
|
||||||
|
const sorted: Record<string, unknown> = {}
|
||||||
|
const keys = Object.keys(obj as Record<string, unknown>).sort()
|
||||||
|
for (const key of keys) {
|
||||||
|
sorted[key] = sortObject((obj as Record<string, unknown>)[key])
|
||||||
|
}
|
||||||
|
return sorted
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createToolCallSignature(
|
||||||
|
toolName: string,
|
||||||
|
toolInput?: Record<string, unknown> | null
|
||||||
|
): string {
|
||||||
|
if (toolInput === undefined || toolInput === null) {
|
||||||
|
return toolName
|
||||||
|
}
|
||||||
|
if (Object.keys(toolInput).length === 0) {
|
||||||
|
return toolName
|
||||||
|
}
|
||||||
|
return `${toolName}::${JSON.stringify(sortObject(toolInput))}`
|
||||||
|
}
|
||||||
|
|
||||||
export function detectRepetitiveToolUse(
|
export function detectRepetitiveToolUse(
|
||||||
window: ToolCallWindow | undefined
|
window: ToolCallWindow | undefined
|
||||||
): ToolLoopDetectionResult {
|
): ToolLoopDetectionResult {
|
||||||
if (!window || window.toolNames.length === 0) {
|
if (!window || window.toolSignatures.length === 0) {
|
||||||
return { triggered: false }
|
return { triggered: false }
|
||||||
}
|
}
|
||||||
|
|
||||||
const counts = new Map<string, number>()
|
const counts = new Map<string, number>()
|
||||||
for (const toolName of window.toolNames) {
|
for (const signature of window.toolSignatures) {
|
||||||
counts.set(toolName, (counts.get(toolName) ?? 0) + 1)
|
counts.set(signature, (counts.get(signature) ?? 0) + 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
let repeatedTool: string | undefined
|
let repeatedTool: string | undefined
|
||||||
@@ -70,7 +101,7 @@ export function detectRepetitiveToolUse(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const sampleSize = window.toolNames.length
|
const sampleSize = window.toolSignatures.length
|
||||||
const minimumSampleSize = Math.min(
|
const minimumSampleSize = Math.min(
|
||||||
window.windowSize,
|
window.windowSize,
|
||||||
Math.ceil((window.windowSize * window.thresholdPercent) / 100)
|
Math.ceil((window.windowSize * window.thresholdPercent) / 100)
|
||||||
@@ -88,7 +119,7 @@ export function detectRepetitiveToolUse(
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
triggered: true,
|
triggered: true,
|
||||||
toolName: repeatedTool,
|
toolName: repeatedTool.split("::")[0],
|
||||||
repeatedCount,
|
repeatedCount,
|
||||||
sampleSize,
|
sampleSize,
|
||||||
thresholdPercent: window.thresholdPercent,
|
thresholdPercent: window.thresholdPercent,
|
||||||
|
|||||||
@@ -236,4 +236,181 @@ describe("BackgroundManager circuit breaker", () => {
|
|||||||
expect(task.progress?.countedToolPartIDs).toEqual(["tool-1"])
|
expect(task.progress?.countedToolPartIDs).toEqual(["tool-1"])
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("#given same tool reading different files", () => {
|
||||||
|
test("#when tool events arrive with state.input #then task keeps running", async () => {
|
||||||
|
const manager = createManager({
|
||||||
|
circuitBreaker: {
|
||||||
|
windowSize: 20,
|
||||||
|
repetitionThresholdPercent: 80,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
const task: BackgroundTask = {
|
||||||
|
id: "task-diff-files-1",
|
||||||
|
sessionID: "session-diff-files-1",
|
||||||
|
parentSessionID: "parent-1",
|
||||||
|
parentMessageID: "msg-1",
|
||||||
|
description: "Reading different files",
|
||||||
|
prompt: "work",
|
||||||
|
agent: "explore",
|
||||||
|
status: "running",
|
||||||
|
startedAt: new Date(Date.now() - 60_000),
|
||||||
|
progress: {
|
||||||
|
toolCalls: 0,
|
||||||
|
lastUpdate: new Date(Date.now() - 60_000),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
getTaskMap(manager).set(task.id, task)
|
||||||
|
|
||||||
|
for (let i = 0; i < 20; i++) {
|
||||||
|
manager.handleEvent({
|
||||||
|
type: "message.part.updated",
|
||||||
|
properties: {
|
||||||
|
part: {
|
||||||
|
sessionID: task.sessionID,
|
||||||
|
type: "tool",
|
||||||
|
tool: "read",
|
||||||
|
state: { status: "running", input: { filePath: `/src/file-${i}.ts` } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
await flushAsyncWork()
|
||||||
|
|
||||||
|
expect(task.status).toBe("running")
|
||||||
|
expect(task.progress?.toolCalls).toBe(20)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given same tool reading same file repeatedly", () => {
|
||||||
|
test("#when tool events arrive with state.input #then task is cancelled with bare tool name in error", async () => {
|
||||||
|
const manager = createManager({
|
||||||
|
circuitBreaker: {
|
||||||
|
windowSize: 20,
|
||||||
|
repetitionThresholdPercent: 80,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
const task: BackgroundTask = {
|
||||||
|
id: "task-same-file-1",
|
||||||
|
sessionID: "session-same-file-1",
|
||||||
|
parentSessionID: "parent-1",
|
||||||
|
parentMessageID: "msg-1",
|
||||||
|
description: "Reading same file repeatedly",
|
||||||
|
prompt: "work",
|
||||||
|
agent: "explore",
|
||||||
|
status: "running",
|
||||||
|
startedAt: new Date(Date.now() - 60_000),
|
||||||
|
progress: {
|
||||||
|
toolCalls: 0,
|
||||||
|
lastUpdate: new Date(Date.now() - 60_000),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
getTaskMap(manager).set(task.id, task)
|
||||||
|
|
||||||
|
for (let i = 0; i < 20; i++) {
|
||||||
|
manager.handleEvent({
|
||||||
|
type: "message.part.updated",
|
||||||
|
properties: {
|
||||||
|
part: {
|
||||||
|
sessionID: task.sessionID,
|
||||||
|
type: "tool",
|
||||||
|
tool: "read",
|
||||||
|
state: { status: "running", input: { filePath: "/src/same.ts" } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
await flushAsyncWork()
|
||||||
|
|
||||||
|
expect(task.status).toBe("cancelled")
|
||||||
|
expect(task.error).toContain("repeatedly called read")
|
||||||
|
expect(task.error).not.toContain("::")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given circuit breaker enabled is false", () => {
|
||||||
|
test("#when repetitive tools arrive #then task keeps running", async () => {
|
||||||
|
const manager = createManager({
|
||||||
|
circuitBreaker: {
|
||||||
|
enabled: false,
|
||||||
|
windowSize: 20,
|
||||||
|
repetitionThresholdPercent: 80,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
const task: BackgroundTask = {
|
||||||
|
id: "task-disabled-1",
|
||||||
|
sessionID: "session-disabled-1",
|
||||||
|
parentSessionID: "parent-1",
|
||||||
|
parentMessageID: "msg-1",
|
||||||
|
description: "Disabled circuit breaker task",
|
||||||
|
prompt: "work",
|
||||||
|
agent: "explore",
|
||||||
|
status: "running",
|
||||||
|
startedAt: new Date(Date.now() - 60_000),
|
||||||
|
progress: {
|
||||||
|
toolCalls: 0,
|
||||||
|
lastUpdate: new Date(Date.now() - 60_000),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
getTaskMap(manager).set(task.id, task)
|
||||||
|
|
||||||
|
for (let i = 0; i < 20; i++) {
|
||||||
|
manager.handleEvent({
|
||||||
|
type: "message.part.updated",
|
||||||
|
properties: {
|
||||||
|
sessionID: task.sessionID,
|
||||||
|
type: "tool",
|
||||||
|
tool: "read",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
await flushAsyncWork()
|
||||||
|
|
||||||
|
expect(task.status).toBe("running")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given circuit breaker enabled is false but absolute cap is low", () => {
|
||||||
|
test("#when max tool calls exceeded #then task is still cancelled by absolute cap", async () => {
|
||||||
|
const manager = createManager({
|
||||||
|
maxToolCalls: 3,
|
||||||
|
circuitBreaker: {
|
||||||
|
enabled: false,
|
||||||
|
windowSize: 10,
|
||||||
|
repetitionThresholdPercent: 95,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
const task: BackgroundTask = {
|
||||||
|
id: "task-cap-disabled-1",
|
||||||
|
sessionID: "session-cap-disabled-1",
|
||||||
|
parentSessionID: "parent-1",
|
||||||
|
parentMessageID: "msg-1",
|
||||||
|
description: "Backstop task with disabled circuit breaker",
|
||||||
|
prompt: "work",
|
||||||
|
agent: "explore",
|
||||||
|
status: "running",
|
||||||
|
startedAt: new Date(Date.now() - 60_000),
|
||||||
|
progress: {
|
||||||
|
toolCalls: 0,
|
||||||
|
lastUpdate: new Date(Date.now() - 60_000),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
getTaskMap(manager).set(task.id, task)
|
||||||
|
|
||||||
|
for (const toolName of ["read", "grep", "edit"]) {
|
||||||
|
manager.handleEvent({
|
||||||
|
type: "message.part.updated",
|
||||||
|
properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
await flushAsyncWork()
|
||||||
|
|
||||||
|
expect(task.status).toBe("cancelled")
|
||||||
|
expect(task.error).toContain("maximum tool call limit (3)")
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ interface MessagePartInfo {
|
|||||||
sessionID?: string
|
sessionID?: string
|
||||||
type?: string
|
type?: string
|
||||||
tool?: string
|
tool?: string
|
||||||
state?: { status?: string }
|
state?: { status?: string; input?: Record<string, unknown> }
|
||||||
}
|
}
|
||||||
|
|
||||||
interface EventProperties {
|
interface EventProperties {
|
||||||
@@ -918,29 +918,32 @@ export class BackgroundManager {
|
|||||||
task.progress.lastTool = partInfo.tool
|
task.progress.lastTool = partInfo.tool
|
||||||
const circuitBreaker = resolveCircuitBreakerSettings(this.config)
|
const circuitBreaker = resolveCircuitBreakerSettings(this.config)
|
||||||
if (partInfo.tool) {
|
if (partInfo.tool) {
|
||||||
task.progress.toolCallWindow = recordToolCall(
|
task.progress.toolCallWindow = recordToolCall(
|
||||||
task.progress.toolCallWindow,
|
task.progress.toolCallWindow,
|
||||||
partInfo.tool,
|
partInfo.tool,
|
||||||
circuitBreaker
|
circuitBreaker,
|
||||||
)
|
partInfo.state?.input
|
||||||
|
)
|
||||||
|
|
||||||
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
|
if (circuitBreaker.enabled) {
|
||||||
if (loopDetection.triggered) {
|
const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
|
||||||
log("[background-agent] Circuit breaker: repetitive tool usage detected", {
|
if (loopDetection.triggered) {
|
||||||
taskId: task.id,
|
log("[background-agent] Circuit breaker: repetitive tool usage detected", {
|
||||||
agent: task.agent,
|
taskId: task.id,
|
||||||
sessionID,
|
agent: task.agent,
|
||||||
toolName: loopDetection.toolName,
|
sessionID,
|
||||||
repeatedCount: loopDetection.repeatedCount,
|
toolName: loopDetection.toolName,
|
||||||
sampleSize: loopDetection.sampleSize,
|
repeatedCount: loopDetection.repeatedCount,
|
||||||
thresholdPercent: loopDetection.thresholdPercent,
|
sampleSize: loopDetection.sampleSize,
|
||||||
})
|
thresholdPercent: loopDetection.thresholdPercent,
|
||||||
void this.cancelTask(task.id, {
|
})
|
||||||
source: "circuit-breaker",
|
void this.cancelTask(task.id, {
|
||||||
reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
|
source: "circuit-breaker",
|
||||||
})
|
reason: `Subagent repeatedly called ${loopDetection.toolName} ${loopDetection.repeatedCount}/${loopDetection.sampleSize} times in the recent tool-call window (${loopDetection.thresholdPercent}% threshold). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
|
||||||
return
|
})
|
||||||
}
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxToolCalls = circuitBreaker.maxToolCalls
|
const maxToolCalls = circuitBreaker.maxToolCalls
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ export type BackgroundTaskStatus =
|
|||||||
| "interrupt"
|
| "interrupt"
|
||||||
|
|
||||||
export interface ToolCallWindow {
|
export interface ToolCallWindow {
|
||||||
toolNames: string[]
|
toolSignatures: string[]
|
||||||
windowSize: number
|
windowSize: number
|
||||||
thresholdPercent: number
|
thresholdPercent: number
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -351,7 +351,7 @@ describe("boulder-state", () => {
|
|||||||
expect(progress.isComplete).toBe(true)
|
expect(progress.isComplete).toBe(true)
|
||||||
})
|
})
|
||||||
|
|
||||||
test("should return isComplete true for empty plan", () => {
|
test("should return isComplete false for plan with content but no checkboxes", () => {
|
||||||
// given - plan with no checkboxes
|
// given - plan with no checkboxes
|
||||||
const planPath = join(TEST_DIR, "empty-plan.md")
|
const planPath = join(TEST_DIR, "empty-plan.md")
|
||||||
writeFileSync(planPath, "# Plan\nNo tasks here")
|
writeFileSync(planPath, "# Plan\nNo tasks here")
|
||||||
@@ -361,7 +361,7 @@ describe("boulder-state", () => {
|
|||||||
|
|
||||||
// then
|
// then
|
||||||
expect(progress.total).toBe(0)
|
expect(progress.total).toBe(0)
|
||||||
expect(progress.isComplete).toBe(true)
|
expect(progress.isComplete).toBe(false)
|
||||||
})
|
})
|
||||||
|
|
||||||
test("should handle non-existent file", () => {
|
test("should handle non-existent file", () => {
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ export function getPlanProgress(planPath: string): PlanProgress {
|
|||||||
return {
|
return {
|
||||||
total,
|
total,
|
||||||
completed,
|
completed,
|
||||||
isComplete: total === 0 || completed === total,
|
isComplete: total > 0 && completed === total,
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
return { total: 0, completed: 0, isComplete: true }
|
return { total: 0, completed: 0, isComplete: true }
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
|
|||||||
- \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
|
- \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
|
||||||
- If specified and valid: hook pre-sets worktree_path in boulder.json
|
- If specified and valid: hook pre-sets worktree_path in boulder.json
|
||||||
- If specified but invalid: you must run \`git worktree add <path> <branch>\` first
|
- If specified but invalid: you must run \`git worktree add <path> <branch>\` first
|
||||||
- If omitted: you MUST choose or create a worktree (see Worktree Setup below)
|
- If omitted: work directly in the current project directory (no worktree)
|
||||||
|
|
||||||
## WHAT TO DO
|
## WHAT TO DO
|
||||||
|
|
||||||
@@ -24,7 +24,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
|
|||||||
- If ONE plan: auto-select it
|
- If ONE plan: auto-select it
|
||||||
- If MULTIPLE plans: show list with timestamps, ask user to select
|
- If MULTIPLE plans: show list with timestamps, ask user to select
|
||||||
|
|
||||||
4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
|
4. **Worktree Setup** (ONLY when \`--worktree\` was explicitly specified and \`worktree_path\` not already set in boulder.json):
|
||||||
1. \`git worktree list --porcelain\` — see available worktrees
|
1. \`git worktree list --porcelain\` — see available worktrees
|
||||||
2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
|
2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
|
||||||
3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
|
3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
|
||||||
@@ -86,6 +86,38 @@ Reading plan and beginning execution...
|
|||||||
|
|
||||||
- The session_id is injected by the hook - use it directly
|
- The session_id is injected by the hook - use it directly
|
||||||
- Always update boulder.json BEFORE starting work
|
- Always update boulder.json BEFORE starting work
|
||||||
- Always set worktree_path in boulder.json before executing any tasks
|
- If worktree_path is set in boulder.json, all work happens inside that worktree directory
|
||||||
- Read the FULL plan file before delegating any tasks
|
- Read the FULL plan file before delegating any tasks
|
||||||
- Follow atlas delegation protocols (7-section format)`
|
- Follow atlas delegation protocols (7-section format)
|
||||||
|
|
||||||
|
## TASK BREAKDOWN (MANDATORY)
|
||||||
|
|
||||||
|
After reading the plan file, you MUST decompose every plan task into granular, implementation-level sub-steps and register ALL of them as task/todo items BEFORE starting any work.
|
||||||
|
|
||||||
|
**How to break down**:
|
||||||
|
- Each plan checkbox item (e.g., \`- [ ] Add user authentication\`) must be split into concrete, actionable sub-tasks
|
||||||
|
- Sub-tasks should be specific enough that each one touches a clear set of files/functions
|
||||||
|
- Include: file to modify, what to change, expected behavior, and how to verify
|
||||||
|
- Do NOT leave any task vague — "implement feature X" is NOT acceptable; "add validateToken() to src/auth/middleware.ts that checks JWT expiry and returns 401" IS acceptable
|
||||||
|
|
||||||
|
**Example breakdown**:
|
||||||
|
Plan task: \`- [ ] Add rate limiting to API\`
|
||||||
|
→ Todo items:
|
||||||
|
1. Create \`src/middleware/rate-limiter.ts\` with sliding window algorithm (max 100 req/min per IP)
|
||||||
|
2. Add RateLimiter middleware to \`src/app.ts\` router chain, before auth middleware
|
||||||
|
3. Add rate limit headers (X-RateLimit-Limit, X-RateLimit-Remaining) to response in \`rate-limiter.ts\`
|
||||||
|
4. Add test: verify 429 response after exceeding limit in \`src/middleware/rate-limiter.test.ts\`
|
||||||
|
5. Add test: verify headers are present on normal responses
|
||||||
|
|
||||||
|
Register these as task/todo items so progress is tracked and visible throughout the session.
|
||||||
|
|
||||||
|
## WORKTREE COMPLETION
|
||||||
|
|
||||||
|
When working in a worktree (\`worktree_path\` is set in boulder.json) and ALL plan tasks are complete:
|
||||||
|
1. Commit all remaining changes in the worktree
|
||||||
|
2. Switch to the main working directory (the original repo, NOT the worktree)
|
||||||
|
3. Merge the worktree branch into the current branch: \`git merge <worktree-branch>\`
|
||||||
|
4. If merge succeeds, clean up: \`git worktree remove <worktree-path>\`
|
||||||
|
5. Remove the boulder.json state
|
||||||
|
|
||||||
|
This is the DEFAULT behavior when \`--worktree\` was used. Skip merge only if the user explicitly instructs otherwise (e.g., asks to create a PR instead).`
|
||||||
|
|||||||
@@ -52,3 +52,4 @@ export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
|
|||||||
export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
|
export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
|
||||||
export { createJsonErrorRecoveryHook, JSON_ERROR_TOOL_EXCLUDE_LIST, JSON_ERROR_PATTERNS, JSON_ERROR_REMINDER } from "./json-error-recovery";
|
export { createJsonErrorRecoveryHook, JSON_ERROR_TOOL_EXCLUDE_LIST, JSON_ERROR_PATTERNS, JSON_ERROR_REMINDER } from "./json-error-recovery";
|
||||||
export { createReadImageResizerHook } from "./read-image-resizer"
|
export { createReadImageResizerHook } from "./read-image-resizer"
|
||||||
|
export { createTodoDescriptionOverrideHook } from "./todo-description-override"
|
||||||
|
|||||||
28
src/hooks/todo-description-override/description.ts
Normal file
28
src/hooks/todo-description-override/description.ts
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
export const TODOWRITE_DESCRIPTION = `Use this tool to create and manage a structured task list for tracking progress on multi-step work.
|
||||||
|
|
||||||
|
## Todo Format (MANDATORY)
|
||||||
|
|
||||||
|
Each todo title MUST encode four elements: WHERE, WHY, HOW, and EXPECTED RESULT.
|
||||||
|
|
||||||
|
Format: "[WHERE] [HOW] to [WHY] — expect [RESULT]"
|
||||||
|
|
||||||
|
GOOD:
|
||||||
|
- "src/utils/validation.ts: Add validateEmail() for input sanitization — returns boolean"
|
||||||
|
- "UserService.create(): Call validateEmail() before DB insert — rejects invalid emails with 400"
|
||||||
|
- "validation.test.ts: Add test for missing @ sign — expect validateEmail('foo') to return false"
|
||||||
|
|
||||||
|
BAD:
|
||||||
|
- "Implement email validation" (where? how? what result?)
|
||||||
|
- "Add dark mode" (this is a feature, not a todo)
|
||||||
|
- "Fix auth" (what file? what changes? what's expected?)
|
||||||
|
|
||||||
|
## Granularity Rules
|
||||||
|
|
||||||
|
Each todo MUST be a single atomic action completable in 1-3 tool calls. If it needs more, split it.
|
||||||
|
|
||||||
|
**Size test**: Can you complete this todo by editing one file or running one command? If not, it's too big.
|
||||||
|
|
||||||
|
## Task Management
|
||||||
|
- One in_progress at a time. Complete it before starting the next.
|
||||||
|
- Mark completed immediately after finishing each item.
|
||||||
|
- Skip this tool for single trivial tasks (one-step, obvious action).`
|
||||||
14
src/hooks/todo-description-override/hook.ts
Normal file
14
src/hooks/todo-description-override/hook.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import { TODOWRITE_DESCRIPTION } from "./description"
|
||||||
|
|
||||||
|
export function createTodoDescriptionOverrideHook() {
|
||||||
|
return {
|
||||||
|
"tool.definition": async (
|
||||||
|
input: { toolID: string },
|
||||||
|
output: { description: string; parameters: unknown },
|
||||||
|
) => {
|
||||||
|
if (input.toolID === "todowrite") {
|
||||||
|
output.description = TODOWRITE_DESCRIPTION
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
40
src/hooks/todo-description-override/index.test.ts
Normal file
40
src/hooks/todo-description-override/index.test.ts
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import { describe, it, expect } from "bun:test"
|
||||||
|
import { createTodoDescriptionOverrideHook } from "./hook"
|
||||||
|
import { TODOWRITE_DESCRIPTION } from "./description"
|
||||||
|
|
||||||
|
describe("createTodoDescriptionOverrideHook", () => {
|
||||||
|
describe("#given hook is created", () => {
|
||||||
|
describe("#when tool.definition is called with todowrite", () => {
|
||||||
|
it("#then should override the description", async () => {
|
||||||
|
const hook = createTodoDescriptionOverrideHook()
|
||||||
|
const output = { description: "original description", parameters: {} }
|
||||||
|
|
||||||
|
await hook["tool.definition"]({ toolID: "todowrite" }, output)
|
||||||
|
|
||||||
|
expect(output.description).toBe(TODOWRITE_DESCRIPTION)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#when tool.definition is called with non-todowrite tool", () => {
|
||||||
|
it("#then should not modify the description", async () => {
|
||||||
|
const hook = createTodoDescriptionOverrideHook()
|
||||||
|
const output = { description: "original description", parameters: {} }
|
||||||
|
|
||||||
|
await hook["tool.definition"]({ toolID: "bash" }, output)
|
||||||
|
|
||||||
|
expect(output.description).toBe("original description")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#when tool.definition is called with TodoWrite (case-insensitive)", () => {
|
||||||
|
it("#then should not override for different casing since OpenCode sends lowercase", async () => {
|
||||||
|
const hook = createTodoDescriptionOverrideHook()
|
||||||
|
const output = { description: "original description", parameters: {} }
|
||||||
|
|
||||||
|
await hook["tool.definition"]({ toolID: "TodoWrite" }, output)
|
||||||
|
|
||||||
|
expect(output.description).toBe("original description")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
1
src/hooks/todo-description-override/index.ts
Normal file
1
src/hooks/todo-description-override/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { createTodoDescriptionOverrideHook } from "./hook"
|
||||||
@@ -71,5 +71,9 @@ export function createPluginInterface(args: {
|
|||||||
ctx,
|
ctx,
|
||||||
hooks,
|
hooks,
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
"tool.definition": async (input, output) => {
|
||||||
|
await hooks.todoDescriptionOverride?.["tool.definition"]?.(input, output)
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import {
|
|||||||
createHashlineReadEnhancerHook,
|
createHashlineReadEnhancerHook,
|
||||||
createReadImageResizerHook,
|
createReadImageResizerHook,
|
||||||
createJsonErrorRecoveryHook,
|
createJsonErrorRecoveryHook,
|
||||||
|
createTodoDescriptionOverrideHook,
|
||||||
} from "../../hooks"
|
} from "../../hooks"
|
||||||
import {
|
import {
|
||||||
getOpenCodeVersion,
|
getOpenCodeVersion,
|
||||||
@@ -35,6 +36,7 @@ export type ToolGuardHooks = {
|
|||||||
hashlineReadEnhancer: ReturnType<typeof createHashlineReadEnhancerHook> | null
|
hashlineReadEnhancer: ReturnType<typeof createHashlineReadEnhancerHook> | null
|
||||||
jsonErrorRecovery: ReturnType<typeof createJsonErrorRecoveryHook> | null
|
jsonErrorRecovery: ReturnType<typeof createJsonErrorRecoveryHook> | null
|
||||||
readImageResizer: ReturnType<typeof createReadImageResizerHook> | null
|
readImageResizer: ReturnType<typeof createReadImageResizerHook> | null
|
||||||
|
todoDescriptionOverride: ReturnType<typeof createTodoDescriptionOverrideHook> | null
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createToolGuardHooks(args: {
|
export function createToolGuardHooks(args: {
|
||||||
@@ -111,6 +113,10 @@ export function createToolGuardHooks(args: {
|
|||||||
? safeHook("read-image-resizer", () => createReadImageResizerHook(ctx))
|
? safeHook("read-image-resizer", () => createReadImageResizerHook(ctx))
|
||||||
: null
|
: null
|
||||||
|
|
||||||
|
const todoDescriptionOverride = isHookEnabled("todo-description-override")
|
||||||
|
? safeHook("todo-description-override", () => createTodoDescriptionOverrideHook())
|
||||||
|
: null
|
||||||
|
|
||||||
return {
|
return {
|
||||||
commentChecker,
|
commentChecker,
|
||||||
toolOutputTruncator,
|
toolOutputTruncator,
|
||||||
@@ -123,5 +129,6 @@ export function createToolGuardHooks(args: {
|
|||||||
hashlineReadEnhancer,
|
hashlineReadEnhancer,
|
||||||
jsonErrorRecovery,
|
jsonErrorRecovery,
|
||||||
readImageResizer,
|
readImageResizer,
|
||||||
|
todoDescriptionOverride,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user