feat(hooks): add resume session_id to verification reminders for orchestrator subagent work

When subagent work fails verification, show exact sisyphus_task(resume="...")
command with session_id for immediate retry. Consolidates verification workflow
across boulder and standalone modes.
This commit is contained in:
YeonGyu-Kim
2026-01-06 14:01:58 +09:00
parent 9eafe6b5f9
commit 3b89a35a13
2 changed files with 172 additions and 47 deletions

View File

@@ -77,7 +77,7 @@ describe("sisyphus-orchestrator hook", () => {
// #when
await hook["tool.execute.after"](
{ tool: "other_tool", sessionID: "session-123", agent: "orchestrator-sisyphus" },
{ tool: "other_tool", sessionID: "session-123" },
output
)
@@ -140,8 +140,8 @@ describe("sisyphus-orchestrator hook", () => {
// #then - standalone verification reminder appended
expect(output.output).toContain("Task completed successfully")
expect(output.output).toContain("VERIFICATION REQUIRED")
expect(output.output).toContain("SUBAGENTS LIE")
expect(output.output).toContain("MANDATORY VERIFICATION")
expect(output.output).toContain("sisyphus_task(resume=")
cleanupMessageStorage(sessionID)
})
@@ -180,6 +180,7 @@ describe("sisyphus-orchestrator hook", () => {
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
expect(output.output).toContain("test-plan")
expect(output.output).toContain("SUBAGENTS LIE")
expect(output.output).toContain("sisyphus_task(resume=")
cleanupMessageStorage(sessionID)
})
@@ -323,9 +324,8 @@ describe("sisyphus-orchestrator hook", () => {
output
)
// #then - output should contain boulder.json path and notepad path format
expect(output.output).toContain(".sisyphus/boulder.json")
expect(output.output).toContain(".sisyphus/notepads/my-feature/{category}.md")
// #then - output should contain plan name and progress
expect(output.output).toContain("my-feature")
expect(output.output).toContain("1/3 done")
expect(output.output).toContain("2 left")
@@ -361,14 +361,152 @@ describe("sisyphus-orchestrator hook", () => {
output
)
// #then - should include resume and checkbox instructions
// #then - should include resume instructions and verification
expect(output.output).toContain("sisyphus_task(resume=")
expect(output.output).toContain("- [ ]")
expect(output.output).toContain("- [x]")
expect(output.output).toContain("VERIFY")
expect(output.output).toContain("[x]")
expect(output.output).toContain("MANDATORY VERIFICATION")
cleanupMessageStorage(sessionID)
})
describe("Write/Edit tool direct work reminder", () => {
const ORCHESTRATOR_SESSION = "orchestrator-write-test"
beforeEach(() => {
setupMessageStorage(ORCHESTRATOR_SESSION, "orchestrator-sisyphus")
})
afterEach(() => {
cleanupMessageStorage(ORCHESTRATOR_SESSION)
})
test("should append delegation reminder when orchestrator writes outside .sisyphus/", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const output = {
title: "Write",
output: "File written successfully",
metadata: { filePath: "/path/to/code.ts" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toContain("DELEGATION REQUIRED")
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
expect(output.output).toContain("sisyphus_task")
})
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const output = {
title: "Edit",
output: "File edited successfully",
metadata: { filePath: "/src/components/button.tsx" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Edit", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toContain("DELEGATION REQUIRED")
})
test("should NOT append reminder when orchestrator writes inside .sisyphus/", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File written successfully"
const output = {
title: "Write",
output: originalOutput,
metadata: { filePath: "/project/.sisyphus/plans/work-plan.md" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toBe(originalOutput)
expect(output.output).not.toContain("DELEGATION REQUIRED")
})
test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
// #given
const nonOrchestratorSession = "non-orchestrator-session"
setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File written successfully"
const output = {
title: "Write",
output: originalOutput,
metadata: { filePath: "/path/to/code.ts" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: nonOrchestratorSession },
output
)
// #then
expect(output.output).toBe(originalOutput)
expect(output.output).not.toContain("DELEGATION REQUIRED")
cleanupMessageStorage(nonOrchestratorSession)
})
test("should NOT append reminder for read-only tools", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File content"
const output = {
title: "Read",
output: originalOutput,
metadata: { filePath: "/path/to/code.ts" },
}
// #when
await hook["tool.execute.after"](
{ tool: "Read", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toBe(originalOutput)
})
test("should handle missing filePath gracefully", async () => {
// #given
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
const originalOutput = "File written successfully"
const output = {
title: "Write",
output: originalOutput,
metadata: {},
}
// #when
await hook["tool.execute.after"](
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
output
)
// #then
expect(output.output).toBe(originalOutput)
})
})
})
describe("session.idle handler (boulder continuation)", () => {

View File

@@ -70,10 +70,8 @@ Subagents FREQUENTLY claim completion when:
2. Run tests yourself - Must PASS (not "agent said it passed")
3. Read the actual code - Must match requirements
4. Check build/typecheck - Must succeed
5. Verify notepad was updated - Must have substantive content
DO NOT TRUST THE AGENT'S SELF-REPORT.
They are non-deterministic and not exceptional - they CANNOT distinguish between completed and incomplete states.
VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
**HANDS-ON QA REQUIRED (after ALL tasks complete):**
@@ -87,45 +85,41 @@ VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems.
**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**`
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string {
function buildVerificationReminder(sessionId: string): string {
return `${VERIFICATION_REMINDER}
---
**If ANY verification fails, use this immediately:**
\`\`\`
sisyphus_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
\`\`\``
}
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }, sessionId: string): string {
const remaining = progress.total - progress.completed
return `
---
**State:** \`.sisyphus/boulder.json\` | Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
**Notepad:** \`.sisyphus/notepads/${planName}/{category}.md\`
**State:** Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
---
${VERIFICATION_REMINDER}
${buildVerificationReminder(sessionId)}
**COMMIT FREQUENTLY:**
- Commit after each verified task unit - one logical change per commit
- Do NOT accumulate multiple tasks into one big commit
- Atomic commits make rollback and review easier
- If verification passes, commit immediately before moving on
**THEN:**
- Broken? \`sisyphus_task(resume="<session_id>", prompt="fix: ...")\`
- Verified? Commit atomic unit, mark \`- [ ]\` to \`- [x]\`, next task`
ALL pass? → commit atomic unit, mark \`[x]\`, next task.`
}
function buildStandaloneVerificationReminder(): string {
function buildStandaloneVerificationReminder(sessionId: string): string {
return `
---
## SISYPHUS_TASK COMPLETED - VERIFICATION REQUIRED
${buildVerificationReminder(sessionId)}`
}
${VERIFICATION_REMINDER}
**VERIFICATION CHECKLIST:**
- [ ] lsp_diagnostics on changed files - Run it yourself
- [ ] Tests pass - Run the test command yourself
- [ ] Code correct - Read the files yourself
- [ ] No regressions - Check related functionality
**REMEMBER:** Agent's "done" does NOT mean actually done.`
function extractSessionIdFromOutput(output: string): string {
const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
return match?.[1] ?? "<session_id>"
}
interface GitFileStat {
@@ -233,12 +227,6 @@ function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string {
return lines.join("\n")
}
interface ToolExecuteInput {
tool: string
sessionID?: string
agent?: string
}
interface ToolExecuteAfterInput {
tool: string
sessionID?: string
@@ -251,8 +239,6 @@ interface ToolExecuteAfterOutput {
metadata: Record<string, unknown>
}
type ToolExecuteOutput = ToolExecuteAfterOutput
function getMessageDir(sessionID: string): string | null {
if (!existsSync(MESSAGE_STORAGE)) return null
@@ -500,6 +486,7 @@ export function createSisyphusOrchestratorHook(
if (output.output && typeof output.output === "string") {
const gitStats = getGitDiffStats(ctx.directory)
const fileChanges = formatFileChanges(gitStats)
const subagentSessionId = extractSessionIdFromOutput(output.output)
const boulderState = readBoulderState(ctx.directory)
@@ -518,7 +505,7 @@ export function createSisyphusOrchestratorHook(
## SUBAGENT WORK COMPLETED
${fileChanges}
${buildOrchestratorReminder(boulderState.plan_name, progress)}`
${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId)}`
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
plan: boulderState.plan_name,
@@ -526,7 +513,7 @@ ${buildOrchestratorReminder(boulderState.plan_name, progress)}`
fileCount: gitStats.length,
})
} else {
output.output += `\n${buildStandaloneVerificationReminder()}`
output.output += `\n${buildStandaloneVerificationReminder(subagentSessionId)}`
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
sessionID: input.sessionID,