feat(hooks): add resume session_id to verification reminders for orchestrator subagent work
When subagent work fails verification, show exact sisyphus_task(resume="...") command with session_id for immediate retry. Consolidates verification workflow across boulder and standalone modes.
This commit is contained in:
@@ -77,7 +77,7 @@ describe("sisyphus-orchestrator hook", () => {
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "other_tool", sessionID: "session-123", agent: "orchestrator-sisyphus" },
|
||||
{ tool: "other_tool", sessionID: "session-123" },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -140,8 +140,8 @@ describe("sisyphus-orchestrator hook", () => {
|
||||
|
||||
// #then - standalone verification reminder appended
|
||||
expect(output.output).toContain("Task completed successfully")
|
||||
expect(output.output).toContain("VERIFICATION REQUIRED")
|
||||
expect(output.output).toContain("SUBAGENTS LIE")
|
||||
expect(output.output).toContain("MANDATORY VERIFICATION")
|
||||
expect(output.output).toContain("sisyphus_task(resume=")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
@@ -180,6 +180,7 @@ describe("sisyphus-orchestrator hook", () => {
|
||||
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
|
||||
expect(output.output).toContain("test-plan")
|
||||
expect(output.output).toContain("SUBAGENTS LIE")
|
||||
expect(output.output).toContain("sisyphus_task(resume=")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
@@ -323,9 +324,8 @@ describe("sisyphus-orchestrator hook", () => {
|
||||
output
|
||||
)
|
||||
|
||||
// #then - output should contain boulder.json path and notepad path format
|
||||
expect(output.output).toContain(".sisyphus/boulder.json")
|
||||
expect(output.output).toContain(".sisyphus/notepads/my-feature/{category}.md")
|
||||
// #then - output should contain plan name and progress
|
||||
expect(output.output).toContain("my-feature")
|
||||
expect(output.output).toContain("1/3 done")
|
||||
expect(output.output).toContain("2 left")
|
||||
|
||||
@@ -361,14 +361,152 @@ describe("sisyphus-orchestrator hook", () => {
|
||||
output
|
||||
)
|
||||
|
||||
// #then - should include resume and checkbox instructions
|
||||
// #then - should include resume instructions and verification
|
||||
expect(output.output).toContain("sisyphus_task(resume=")
|
||||
expect(output.output).toContain("- [ ]")
|
||||
expect(output.output).toContain("- [x]")
|
||||
expect(output.output).toContain("VERIFY")
|
||||
expect(output.output).toContain("[x]")
|
||||
expect(output.output).toContain("MANDATORY VERIFICATION")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
describe("Write/Edit tool direct work reminder", () => {
|
||||
const ORCHESTRATOR_SESSION = "orchestrator-write-test"
|
||||
|
||||
beforeEach(() => {
|
||||
setupMessageStorage(ORCHESTRATOR_SESSION, "orchestrator-sisyphus")
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
cleanupMessageStorage(ORCHESTRATOR_SESSION)
|
||||
})
|
||||
|
||||
test("should append delegation reminder when orchestrator writes outside .sisyphus/", async () => {
|
||||
// #given
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Write",
|
||||
output: "File written successfully",
|
||||
metadata: { filePath: "/path/to/code.ts" },
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(output.output).toContain("DELEGATION REQUIRED")
|
||||
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
|
||||
expect(output.output).toContain("sisyphus_task")
|
||||
})
|
||||
|
||||
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {
|
||||
// #given
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Edit",
|
||||
output: "File edited successfully",
|
||||
metadata: { filePath: "/src/components/button.tsx" },
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "Edit", sessionID: ORCHESTRATOR_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(output.output).toContain("DELEGATION REQUIRED")
|
||||
})
|
||||
|
||||
test("should NOT append reminder when orchestrator writes inside .sisyphus/", async () => {
|
||||
// #given
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const originalOutput = "File written successfully"
|
||||
const output = {
|
||||
title: "Write",
|
||||
output: originalOutput,
|
||||
metadata: { filePath: "/project/.sisyphus/plans/work-plan.md" },
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(output.output).toBe(originalOutput)
|
||||
expect(output.output).not.toContain("DELEGATION REQUIRED")
|
||||
})
|
||||
|
||||
test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
|
||||
// #given
|
||||
const nonOrchestratorSession = "non-orchestrator-session"
|
||||
setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const originalOutput = "File written successfully"
|
||||
const output = {
|
||||
title: "Write",
|
||||
output: originalOutput,
|
||||
metadata: { filePath: "/path/to/code.ts" },
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "Write", sessionID: nonOrchestratorSession },
|
||||
output
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(output.output).toBe(originalOutput)
|
||||
expect(output.output).not.toContain("DELEGATION REQUIRED")
|
||||
|
||||
cleanupMessageStorage(nonOrchestratorSession)
|
||||
})
|
||||
|
||||
test("should NOT append reminder for read-only tools", async () => {
|
||||
// #given
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const originalOutput = "File content"
|
||||
const output = {
|
||||
title: "Read",
|
||||
output: originalOutput,
|
||||
metadata: { filePath: "/path/to/code.ts" },
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "Read", sessionID: ORCHESTRATOR_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(output.output).toBe(originalOutput)
|
||||
})
|
||||
|
||||
test("should handle missing filePath gracefully", async () => {
|
||||
// #given
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const originalOutput = "File written successfully"
|
||||
const output = {
|
||||
title: "Write",
|
||||
output: originalOutput,
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "Write", sessionID: ORCHESTRATOR_SESSION },
|
||||
output
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(output.output).toBe(originalOutput)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("session.idle handler (boulder continuation)", () => {
|
||||
|
||||
@@ -70,10 +70,8 @@ Subagents FREQUENTLY claim completion when:
|
||||
2. Run tests yourself - Must PASS (not "agent said it passed")
|
||||
3. Read the actual code - Must match requirements
|
||||
4. Check build/typecheck - Must succeed
|
||||
5. Verify notepad was updated - Must have substantive content
|
||||
|
||||
DO NOT TRUST THE AGENT'S SELF-REPORT.
|
||||
They are non-deterministic and not exceptional - they CANNOT distinguish between completed and incomplete states.
|
||||
VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
|
||||
|
||||
**HANDS-ON QA REQUIRED (after ALL tasks complete):**
|
||||
@@ -87,45 +85,41 @@ VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.
|
||||
Static analysis CANNOT catch: visual bugs, animation issues, user flow breakages, integration problems.
|
||||
**FAILURE TO DO HANDS-ON QA = INCOMPLETE WORK.**`
|
||||
|
||||
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string {
|
||||
function buildVerificationReminder(sessionId: string): string {
|
||||
return `${VERIFICATION_REMINDER}
|
||||
|
||||
---
|
||||
|
||||
**If ANY verification fails, use this immediately:**
|
||||
\`\`\`
|
||||
sisyphus_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
|
||||
\`\`\``
|
||||
}
|
||||
|
||||
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }, sessionId: string): string {
|
||||
const remaining = progress.total - progress.completed
|
||||
return `
|
||||
---
|
||||
|
||||
**State:** \`.sisyphus/boulder.json\` | Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
|
||||
|
||||
**Notepad:** \`.sisyphus/notepads/${planName}/{category}.md\`
|
||||
**State:** Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
|
||||
|
||||
---
|
||||
|
||||
${VERIFICATION_REMINDER}
|
||||
${buildVerificationReminder(sessionId)}
|
||||
|
||||
**COMMIT FREQUENTLY:**
|
||||
- Commit after each verified task unit - one logical change per commit
|
||||
- Do NOT accumulate multiple tasks into one big commit
|
||||
- Atomic commits make rollback and review easier
|
||||
- If verification passes, commit immediately before moving on
|
||||
|
||||
**THEN:**
|
||||
- Broken? \`sisyphus_task(resume="<session_id>", prompt="fix: ...")\`
|
||||
- Verified? Commit atomic unit, mark \`- [ ]\` to \`- [x]\`, next task`
|
||||
ALL pass? → commit atomic unit, mark \`[x]\`, next task.`
|
||||
}
|
||||
|
||||
function buildStandaloneVerificationReminder(): string {
|
||||
function buildStandaloneVerificationReminder(sessionId: string): string {
|
||||
return `
|
||||
---
|
||||
|
||||
## SISYPHUS_TASK COMPLETED - VERIFICATION REQUIRED
|
||||
${buildVerificationReminder(sessionId)}`
|
||||
}
|
||||
|
||||
${VERIFICATION_REMINDER}
|
||||
|
||||
**VERIFICATION CHECKLIST:**
|
||||
- [ ] lsp_diagnostics on changed files - Run it yourself
|
||||
- [ ] Tests pass - Run the test command yourself
|
||||
- [ ] Code correct - Read the files yourself
|
||||
- [ ] No regressions - Check related functionality
|
||||
|
||||
**REMEMBER:** Agent's "done" does NOT mean actually done.`
|
||||
function extractSessionIdFromOutput(output: string): string {
|
||||
const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
|
||||
return match?.[1] ?? "<session_id>"
|
||||
}
|
||||
|
||||
interface GitFileStat {
|
||||
@@ -233,12 +227,6 @@ function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string {
|
||||
return lines.join("\n")
|
||||
}
|
||||
|
||||
interface ToolExecuteInput {
|
||||
tool: string
|
||||
sessionID?: string
|
||||
agent?: string
|
||||
}
|
||||
|
||||
interface ToolExecuteAfterInput {
|
||||
tool: string
|
||||
sessionID?: string
|
||||
@@ -251,8 +239,6 @@ interface ToolExecuteAfterOutput {
|
||||
metadata: Record<string, unknown>
|
||||
}
|
||||
|
||||
type ToolExecuteOutput = ToolExecuteAfterOutput
|
||||
|
||||
function getMessageDir(sessionID: string): string | null {
|
||||
if (!existsSync(MESSAGE_STORAGE)) return null
|
||||
|
||||
@@ -500,6 +486,7 @@ export function createSisyphusOrchestratorHook(
|
||||
if (output.output && typeof output.output === "string") {
|
||||
const gitStats = getGitDiffStats(ctx.directory)
|
||||
const fileChanges = formatFileChanges(gitStats)
|
||||
const subagentSessionId = extractSessionIdFromOutput(output.output)
|
||||
|
||||
const boulderState = readBoulderState(ctx.directory)
|
||||
|
||||
@@ -518,7 +505,7 @@ export function createSisyphusOrchestratorHook(
|
||||
## SUBAGENT WORK COMPLETED
|
||||
|
||||
${fileChanges}
|
||||
${buildOrchestratorReminder(boulderState.plan_name, progress)}`
|
||||
${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId)}`
|
||||
|
||||
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
|
||||
plan: boulderState.plan_name,
|
||||
@@ -526,7 +513,7 @@ ${buildOrchestratorReminder(boulderState.plan_name, progress)}`
|
||||
fileCount: gitStats.length,
|
||||
})
|
||||
} else {
|
||||
output.output += `\n${buildStandaloneVerificationReminder()}`
|
||||
output.output += `\n${buildStandaloneVerificationReminder(subagentSessionId)}`
|
||||
|
||||
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
||||
sessionID: input.sessionID,
|
||||
|
||||
Reference in New Issue
Block a user