feat(hooks): add sisyphus-orchestrator hook
Add hook for orchestrating Sisyphus agent workflows: - Coordinates task execution between agents - Manages workflow state persistence - Handles agent handoffs Includes comprehensive test coverage. 🤖 GENERATED WITH ASSISTANCE OF [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
654
src/hooks/sisyphus-orchestrator/index.test.ts
Normal file
654
src/hooks/sisyphus-orchestrator/index.test.ts
Normal file
@@ -0,0 +1,654 @@
|
||||
import { describe, expect, test, beforeEach, afterEach, mock } from "bun:test"
|
||||
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
import { tmpdir } from "node:os"
|
||||
import { createSisyphusOrchestratorHook } from "./index"
|
||||
import {
|
||||
writeBoulderState,
|
||||
clearBoulderState,
|
||||
readBoulderState,
|
||||
} from "../../features/boulder-state"
|
||||
import type { BoulderState } from "../../features/boulder-state"
|
||||
|
||||
import { MESSAGE_STORAGE } from "../../features/hook-message-injector"
|
||||
|
||||
describe("sisyphus-orchestrator hook", () => {
|
||||
const TEST_DIR = join(tmpdir(), "sisyphus-orchestrator-test-" + Date.now())
|
||||
const SISYPHUS_DIR = join(TEST_DIR, ".sisyphus")
|
||||
|
||||
function createMockPluginInput(overrides?: { promptMock?: ReturnType<typeof mock> }) {
|
||||
const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve())
|
||||
return {
|
||||
directory: TEST_DIR,
|
||||
client: {
|
||||
session: {
|
||||
prompt: promptMock,
|
||||
},
|
||||
},
|
||||
_promptMock: promptMock,
|
||||
} as unknown as Parameters<typeof createSisyphusOrchestratorHook>[0] & { _promptMock: ReturnType<typeof mock> }
|
||||
}
|
||||
|
||||
function setupMessageStorage(sessionID: string, agent: string): void {
|
||||
const messageDir = join(MESSAGE_STORAGE, sessionID)
|
||||
if (!existsSync(messageDir)) {
|
||||
mkdirSync(messageDir, { recursive: true })
|
||||
}
|
||||
const messageData = {
|
||||
agent,
|
||||
model: { providerID: "anthropic", modelID: "claude-opus-4-5" },
|
||||
}
|
||||
writeFileSync(join(messageDir, "msg_test001.json"), JSON.stringify(messageData))
|
||||
}
|
||||
|
||||
function cleanupMessageStorage(sessionID: string): void {
|
||||
const messageDir = join(MESSAGE_STORAGE, sessionID)
|
||||
if (existsSync(messageDir)) {
|
||||
rmSync(messageDir, { recursive: true, force: true })
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
if (!existsSync(TEST_DIR)) {
|
||||
mkdirSync(TEST_DIR, { recursive: true })
|
||||
}
|
||||
if (!existsSync(SISYPHUS_DIR)) {
|
||||
mkdirSync(SISYPHUS_DIR, { recursive: true })
|
||||
}
|
||||
clearBoulderState(TEST_DIR)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
clearBoulderState(TEST_DIR)
|
||||
if (existsSync(TEST_DIR)) {
|
||||
rmSync(TEST_DIR, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
describe("tool.execute.after handler", () => {
|
||||
test("should ignore non-sisyphus_task tools", async () => {
|
||||
// #given - hook and non-sisyphus_task tool
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Test Tool",
|
||||
output: "Original output",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "other_tool", sessionID: "session-123", agent: "orchestrator-sisyphus" },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - output unchanged
|
||||
expect(output.output).toBe("Original output")
|
||||
})
|
||||
|
||||
test("should not transform when caller is not orchestrator-sisyphus", async () => {
|
||||
// #given - boulder state exists but caller agent in message storage is not orchestrator
|
||||
const sessionID = "session-non-orchestrator-test"
|
||||
setupMessageStorage(sessionID, "other-agent")
|
||||
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed successfully",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - output unchanged because caller is not orchestrator
|
||||
expect(output.output).toBe("Task completed successfully")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should append standalone verification when no boulder state but caller is orchestrator", async () => {
|
||||
// #given - no boulder state, but caller is orchestrator
|
||||
const sessionID = "session-no-boulder-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed successfully",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - standalone verification reminder appended
|
||||
expect(output.output).toContain("Task completed successfully")
|
||||
expect(output.output).toContain("VERIFICATION REQUIRED")
|
||||
expect(output.output).toContain("SUBAGENTS LIE")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should transform output when caller is orchestrator-sisyphus with boulder state", async () => {
|
||||
// #given - orchestrator-sisyphus caller with boulder state
|
||||
const sessionID = "session-transform-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed successfully",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - output should be transformed (original output replaced)
|
||||
expect(output.output).not.toContain("Task completed successfully")
|
||||
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
|
||||
expect(output.output).toContain("test-plan")
|
||||
expect(output.output).toContain("SUBAGENTS LIE")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should still transform when plan is complete (shows progress)", async () => {
|
||||
// #given - boulder state with complete plan, orchestrator caller
|
||||
const sessionID = "session-complete-plan-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const planPath = join(TEST_DIR, "complete-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "complete-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Original output",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - output transformed even when complete (shows 2/2 done)
|
||||
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
|
||||
expect(output.output).toContain("2/2 done")
|
||||
expect(output.output).toContain("0 left")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should append session ID to boulder state if not present", async () => {
|
||||
// #given - boulder state without session-append-test, orchestrator caller
|
||||
const sessionID = "session-append-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task output",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - sessionID should be appended
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
expect(updatedState?.session_ids).toContain(sessionID)
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should not duplicate existing session ID", async () => {
|
||||
// #given - boulder state already has session-dup-test, orchestrator caller
|
||||
const sessionID = "session-dup-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [sessionID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task output",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - should still have only one sessionID
|
||||
const updatedState = readBoulderState(TEST_DIR)
|
||||
const count = updatedState?.session_ids.filter((id) => id === sessionID).length
|
||||
expect(count).toBe(1)
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should include boulder.json path and notepad path in transformed output", async () => {
|
||||
// #given - boulder state, orchestrator caller
|
||||
const sessionID = "session-path-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const planPath = join(TEST_DIR, "my-feature.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "my-feature",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - output should contain boulder.json path and notepad path format
|
||||
expect(output.output).toContain(".sisyphus/boulder.json")
|
||||
expect(output.output).toContain(".sisyphus/notepads/my-feature/{category}.md")
|
||||
expect(output.output).toContain("1/3 done")
|
||||
expect(output.output).toContain("2 left")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
|
||||
test("should include resume and checkbox instructions in reminder", async () => {
|
||||
// #given - boulder state, orchestrator caller
|
||||
const sessionID = "session-resume-test"
|
||||
setupMessageStorage(sessionID, "orchestrator-sisyphus")
|
||||
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: ["session-1"],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const hook = createSisyphusOrchestratorHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Sisyphus Task",
|
||||
output: "Task completed",
|
||||
metadata: {},
|
||||
}
|
||||
|
||||
// #when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "sisyphus_task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// #then - should include resume and checkbox instructions
|
||||
expect(output.output).toContain("sisyphus_task(resume=")
|
||||
expect(output.output).toContain("- [ ]")
|
||||
expect(output.output).toContain("- [x]")
|
||||
expect(output.output).toContain("VERIFY")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
})
|
||||
|
||||
describe("session.idle handler (boulder continuation)", () => {
|
||||
const MAIN_SESSION_ID = "main-session-123"
|
||||
|
||||
beforeEach(() => {
|
||||
mock.module("../../features/claude-code-session-state", () => ({
|
||||
getMainSessionID: () => MAIN_SESSION_ID,
|
||||
subagentSessions: new Set<string>(),
|
||||
}))
|
||||
})
|
||||
|
||||
test("should inject continuation when boulder has incomplete tasks", async () => {
|
||||
// #given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should call prompt with continuation
|
||||
expect(mockInput._promptMock).toHaveBeenCalled()
|
||||
const callArgs = mockInput._promptMock.mock.calls[0][0]
|
||||
expect(callArgs.path.id).toBe(MAIN_SESSION_ID)
|
||||
expect(callArgs.body.parts[0].text).toContain("BOULDER CONTINUATION")
|
||||
expect(callArgs.body.parts[0].text).toContain("2 remaining")
|
||||
})
|
||||
|
||||
test("should not inject when no boulder state exists", async () => {
|
||||
// #given - no boulder state
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should not call prompt
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should not inject when boulder plan is complete", async () => {
|
||||
// #given - boulder state with complete plan
|
||||
const planPath = join(TEST_DIR, "complete-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "complete-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should not call prompt
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should skip when abort error occurred before idle", async () => {
|
||||
// #given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when - send abort error then idle
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: {
|
||||
sessionID: MAIN_SESSION_ID,
|
||||
error: { name: "AbortError", message: "aborted" },
|
||||
},
|
||||
},
|
||||
})
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should not call prompt
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should skip when background tasks are running", async () => {
|
||||
// #given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockBackgroundManager = {
|
||||
getTasksByParentSession: () => [{ status: "running" }],
|
||||
}
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput, {
|
||||
directory: TEST_DIR,
|
||||
backgroundManager: mockBackgroundManager as any,
|
||||
})
|
||||
|
||||
// #when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should not call prompt
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should clear abort state on message.updated", async () => {
|
||||
// #given - boulder with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when - abort error, then message update, then idle
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: {
|
||||
sessionID: MAIN_SESSION_ID,
|
||||
error: { name: "AbortError" },
|
||||
},
|
||||
},
|
||||
})
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: { info: { sessionID: MAIN_SESSION_ID, role: "user" } },
|
||||
},
|
||||
})
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should call prompt because abort state was cleared
|
||||
expect(mockInput._promptMock).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should include plan progress in continuation prompt", async () => {
|
||||
// #given - boulder state with specific progress
|
||||
const planPath = join(TEST_DIR, "progress-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2\n- [ ] Task 3\n- [ ] Task 4")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "progress-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should include progress
|
||||
const callArgs = mockInput._promptMock.mock.calls[0][0]
|
||||
expect(callArgs.body.parts[0].text).toContain("2/4 completed")
|
||||
expect(callArgs.body.parts[0].text).toContain("2 remaining")
|
||||
})
|
||||
|
||||
test("should cleanup on session.deleted", async () => {
|
||||
// #given - boulder state
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createSisyphusOrchestratorHook(mockInput)
|
||||
|
||||
// #when - create abort state then delete
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: {
|
||||
sessionID: MAIN_SESSION_ID,
|
||||
error: { name: "AbortError" },
|
||||
},
|
||||
},
|
||||
})
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.deleted",
|
||||
properties: { info: { id: MAIN_SESSION_ID } },
|
||||
},
|
||||
})
|
||||
|
||||
// Re-create boulder after deletion
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
// Trigger idle - should inject because state was cleaned up
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// #then - should call prompt because session state was cleaned
|
||||
expect(mockInput._promptMock).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
})
|
||||
475
src/hooks/sisyphus-orchestrator/index.ts
Normal file
475
src/hooks/sisyphus-orchestrator/index.ts
Normal file
@@ -0,0 +1,475 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { execSync } from "node:child_process"
|
||||
import { existsSync, readdirSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
import {
|
||||
readBoulderState,
|
||||
appendSessionId,
|
||||
getPlanProgress,
|
||||
} from "../../features/boulder-state"
|
||||
import { getMainSessionID, subagentSessions } from "../../features/claude-code-session-state"
|
||||
import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/hook-message-injector"
|
||||
import { log } from "../../shared/logger"
|
||||
import type { BackgroundManager } from "../../features/background-agent"
|
||||
|
||||
export const HOOK_NAME = "sisyphus-orchestrator"
|
||||
|
||||
const BOULDER_CONTINUATION_PROMPT = `[SYSTEM REMINDER - BOULDER CONTINUATION]
|
||||
|
||||
You have an active work plan with incomplete tasks. Continue working.
|
||||
|
||||
RULES:
|
||||
- Proceed without asking for permission
|
||||
- Mark each checkbox [x] in the plan file when done
|
||||
- Use the notepad at .sisyphus/notepads/{PLAN_NAME}/ to record learnings
|
||||
- Do not stop until all tasks are complete
|
||||
- If blocked, document the blocker and move to the next task`
|
||||
|
||||
const VERIFICATION_REMINDER = `**MANDATORY VERIFICATION - SUBAGENTS LIE**
|
||||
|
||||
Subagents FREQUENTLY claim completion when:
|
||||
- Tests are actually FAILING
|
||||
- Code has type/lint ERRORS
|
||||
- Implementation is INCOMPLETE
|
||||
- Patterns were NOT followed
|
||||
|
||||
**YOU MUST VERIFY EVERYTHING YOURSELF:**
|
||||
|
||||
1. Run \`lsp_diagnostics\` on changed files - Must be CLEAN
|
||||
2. Run tests yourself - Must PASS (not "agent said it passed")
|
||||
3. Read the actual code - Must match requirements
|
||||
4. Check build/typecheck - Must succeed
|
||||
5. Verify notepad was updated - Must have substantive content
|
||||
|
||||
DO NOT TRUST THE AGENT'S SELF-REPORT.
|
||||
VERIFY EACH CLAIM WITH YOUR OWN TOOL CALLS.`
|
||||
|
||||
function buildOrchestratorReminder(planName: string, progress: { total: number; completed: number }): string {
|
||||
const remaining = progress.total - progress.completed
|
||||
return `
|
||||
---
|
||||
|
||||
**State:** \`.sisyphus/boulder.json\` | Plan: ${planName} | ${progress.completed}/${progress.total} done, ${remaining} left
|
||||
|
||||
**Notepad:** \`.sisyphus/notepads/${planName}/{category}.md\`
|
||||
|
||||
---
|
||||
|
||||
${VERIFICATION_REMINDER}
|
||||
|
||||
**COMMIT FREQUENTLY:**
|
||||
- Commit after each verified task unit - one logical change per commit
|
||||
- Do NOT accumulate multiple tasks into one big commit
|
||||
- Atomic commits make rollback and review easier
|
||||
- If verification passes, commit immediately before moving on
|
||||
|
||||
**THEN:**
|
||||
- Broken? \`sisyphus_task(resume="<session_id>", prompt="fix: ...")\`
|
||||
- Verified? Commit atomic unit, mark \`- [ ]\` to \`- [x]\`, next task`
|
||||
}
|
||||
|
||||
function buildStandaloneVerificationReminder(): string {
|
||||
return `
|
||||
---
|
||||
|
||||
## SISYPHUS_TASK COMPLETED - VERIFICATION REQUIRED
|
||||
|
||||
${VERIFICATION_REMINDER}
|
||||
|
||||
**VERIFICATION CHECKLIST:**
|
||||
- [ ] lsp_diagnostics on changed files - Run it yourself
|
||||
- [ ] Tests pass - Run the test command yourself
|
||||
- [ ] Code correct - Read the files yourself
|
||||
- [ ] No regressions - Check related functionality
|
||||
|
||||
**REMEMBER:** Agent's "done" does NOT mean actually done.`
|
||||
}
|
||||
|
||||
interface GitFileStat {
|
||||
path: string
|
||||
added: number
|
||||
removed: number
|
||||
status: "modified" | "added" | "deleted"
|
||||
}
|
||||
|
||||
function getGitDiffStats(directory: string): GitFileStat[] {
|
||||
try {
|
||||
const output = execSync("git diff --numstat HEAD", {
|
||||
cwd: directory,
|
||||
encoding: "utf-8",
|
||||
timeout: 5000,
|
||||
}).trim()
|
||||
|
||||
if (!output) return []
|
||||
|
||||
const statusOutput = execSync("git status --porcelain", {
|
||||
cwd: directory,
|
||||
encoding: "utf-8",
|
||||
timeout: 5000,
|
||||
}).trim()
|
||||
|
||||
const statusMap = new Map<string, "modified" | "added" | "deleted">()
|
||||
for (const line of statusOutput.split("\n")) {
|
||||
if (!line) continue
|
||||
const status = line.substring(0, 2).trim()
|
||||
const filePath = line.substring(3)
|
||||
if (status === "A" || status === "??") {
|
||||
statusMap.set(filePath, "added")
|
||||
} else if (status === "D") {
|
||||
statusMap.set(filePath, "deleted")
|
||||
} else {
|
||||
statusMap.set(filePath, "modified")
|
||||
}
|
||||
}
|
||||
|
||||
const stats: GitFileStat[] = []
|
||||
for (const line of output.split("\n")) {
|
||||
const parts = line.split("\t")
|
||||
if (parts.length < 3) continue
|
||||
|
||||
const [addedStr, removedStr, path] = parts
|
||||
const added = addedStr === "-" ? 0 : parseInt(addedStr, 10)
|
||||
const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10)
|
||||
|
||||
stats.push({
|
||||
path,
|
||||
added,
|
||||
removed,
|
||||
status: statusMap.get(path) ?? "modified",
|
||||
})
|
||||
}
|
||||
|
||||
return stats
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string {
|
||||
if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n"
|
||||
|
||||
const modified = stats.filter((s) => s.status === "modified")
|
||||
const added = stats.filter((s) => s.status === "added")
|
||||
const deleted = stats.filter((s) => s.status === "deleted")
|
||||
|
||||
const lines: string[] = ["[FILE CHANGES SUMMARY]"]
|
||||
|
||||
if (modified.length > 0) {
|
||||
lines.push("Modified files:")
|
||||
for (const f of modified) {
|
||||
lines.push(` ${f.path} (+${f.added}, -${f.removed})`)
|
||||
}
|
||||
lines.push("")
|
||||
}
|
||||
|
||||
if (added.length > 0) {
|
||||
lines.push("Created files:")
|
||||
for (const f of added) {
|
||||
lines.push(` ${f.path} (+${f.added})`)
|
||||
}
|
||||
lines.push("")
|
||||
}
|
||||
|
||||
if (deleted.length > 0) {
|
||||
lines.push("Deleted files:")
|
||||
for (const f of deleted) {
|
||||
lines.push(` ${f.path} (-${f.removed})`)
|
||||
}
|
||||
lines.push("")
|
||||
}
|
||||
|
||||
if (notepadPath) {
|
||||
const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus"))
|
||||
if (notepadStat) {
|
||||
lines.push("[NOTEPAD UPDATED]")
|
||||
lines.push(` ${notepadStat.path} (+${notepadStat.added})`)
|
||||
lines.push("")
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n")
|
||||
}
|
||||
|
||||
interface ToolExecuteInput {
|
||||
tool: string
|
||||
sessionID?: string
|
||||
agent?: string
|
||||
}
|
||||
|
||||
interface ToolExecuteOutput {
|
||||
title: string
|
||||
output: string
|
||||
metadata: unknown
|
||||
}
|
||||
|
||||
function getMessageDir(sessionID: string): string | null {
|
||||
if (!existsSync(MESSAGE_STORAGE)) return null
|
||||
|
||||
const directPath = join(MESSAGE_STORAGE, sessionID)
|
||||
if (existsSync(directPath)) return directPath
|
||||
|
||||
for (const dir of readdirSync(MESSAGE_STORAGE)) {
|
||||
const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
|
||||
if (existsSync(sessionPath)) return sessionPath
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function isCallerOrchestrator(sessionID?: string): boolean {
|
||||
if (!sessionID) return false
|
||||
const messageDir = getMessageDir(sessionID)
|
||||
if (!messageDir) return false
|
||||
const nearest = findNearestMessageWithFields(messageDir)
|
||||
return nearest?.agent === "orchestrator-sisyphus"
|
||||
}
|
||||
|
||||
interface SessionState {
|
||||
lastEventWasAbortError?: boolean
|
||||
}
|
||||
|
||||
export interface SisyphusOrchestratorHookOptions {
|
||||
directory: string
|
||||
backgroundManager?: BackgroundManager
|
||||
}
|
||||
|
||||
function isAbortError(error: unknown): boolean {
|
||||
if (!error) return false
|
||||
|
||||
if (typeof error === "object") {
|
||||
const errObj = error as Record<string, unknown>
|
||||
const name = errObj.name as string | undefined
|
||||
const message = (errObj.message as string | undefined)?.toLowerCase() ?? ""
|
||||
|
||||
if (name === "MessageAbortedError" || name === "AbortError") return true
|
||||
if (name === "DOMException" && message.includes("abort")) return true
|
||||
if (message.includes("aborted") || message.includes("cancelled") || message.includes("interrupted")) return true
|
||||
}
|
||||
|
||||
if (typeof error === "string") {
|
||||
const lower = error.toLowerCase()
|
||||
return lower.includes("abort") || lower.includes("cancel") || lower.includes("interrupt")
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
export function createSisyphusOrchestratorHook(
|
||||
ctx: PluginInput,
|
||||
options?: SisyphusOrchestratorHookOptions
|
||||
) {
|
||||
const backgroundManager = options?.backgroundManager
|
||||
const sessions = new Map<string, SessionState>()
|
||||
|
||||
function getState(sessionID: string): SessionState {
|
||||
let state = sessions.get(sessionID)
|
||||
if (!state) {
|
||||
state = {}
|
||||
sessions.set(sessionID, state)
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
async function injectContinuation(sessionID: string, planName: string, remaining: number, total: number): Promise<void> {
|
||||
const hasRunningBgTasks = backgroundManager
|
||||
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
||||
: false
|
||||
|
||||
if (hasRunningBgTasks) {
|
||||
log(`[${HOOK_NAME}] Skipped injection: background tasks running`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const prompt = BOULDER_CONTINUATION_PROMPT
|
||||
.replace(/{PLAN_NAME}/g, planName) +
|
||||
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]`
|
||||
|
||||
try {
|
||||
log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining })
|
||||
|
||||
await ctx.client.session.prompt({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
parts: [{ type: "text", text: prompt }],
|
||||
},
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
|
||||
log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
|
||||
} catch (err) {
|
||||
log(`[${HOOK_NAME}] Boulder continuation failed`, { sessionID, error: String(err) })
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
handler: async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
|
||||
const props = event.properties as Record<string, unknown> | undefined
|
||||
|
||||
if (event.type === "session.error") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
if (!sessionID) return
|
||||
|
||||
const state = getState(sessionID)
|
||||
const isAbort = isAbortError(props?.error)
|
||||
state.lastEventWasAbortError = isAbort
|
||||
|
||||
log(`[${HOOK_NAME}] session.error`, { sessionID, isAbort })
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.idle") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
if (!sessionID) return
|
||||
|
||||
log(`[${HOOK_NAME}] session.idle`, { sessionID })
|
||||
|
||||
const mainSessionID = getMainSessionID()
|
||||
const isMainSession = sessionID === mainSessionID
|
||||
const isBackgroundTaskSession = subagentSessions.has(sessionID)
|
||||
|
||||
if (mainSessionID && !isMainSession && !isBackgroundTaskSession) {
|
||||
log(`[${HOOK_NAME}] Skipped: not main or background task session`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const state = getState(sessionID)
|
||||
|
||||
if (state.lastEventWasAbortError) {
|
||||
state.lastEventWasAbortError = false
|
||||
log(`[${HOOK_NAME}] Skipped: abort error immediately before idle`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const hasRunningBgTasks = backgroundManager
|
||||
? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running")
|
||||
: false
|
||||
|
||||
if (hasRunningBgTasks) {
|
||||
log(`[${HOOK_NAME}] Skipped: background tasks running`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const boulderState = readBoulderState(ctx.directory)
|
||||
if (!boulderState) {
|
||||
log(`[${HOOK_NAME}] No active boulder`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const progress = getPlanProgress(boulderState.active_plan)
|
||||
if (progress.isComplete) {
|
||||
log(`[${HOOK_NAME}] Boulder complete`, { sessionID, plan: boulderState.plan_name })
|
||||
return
|
||||
}
|
||||
|
||||
const remaining = progress.total - progress.completed
|
||||
injectContinuation(sessionID, boulderState.plan_name, remaining, progress.total)
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "message.updated") {
|
||||
const info = props?.info as Record<string, unknown> | undefined
|
||||
const sessionID = info?.sessionID as string | undefined
|
||||
|
||||
if (!sessionID) return
|
||||
|
||||
const state = sessions.get(sessionID)
|
||||
if (state) {
|
||||
state.lastEventWasAbortError = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "message.part.updated") {
|
||||
const info = props?.info as Record<string, unknown> | undefined
|
||||
const sessionID = info?.sessionID as string | undefined
|
||||
const role = info?.role as string | undefined
|
||||
|
||||
if (sessionID && role === "assistant") {
|
||||
const state = sessions.get(sessionID)
|
||||
if (state) {
|
||||
state.lastEventWasAbortError = false
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "tool.execute.before" || event.type === "tool.execute.after") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
if (sessionID) {
|
||||
const state = sessions.get(sessionID)
|
||||
if (state) {
|
||||
state.lastEventWasAbortError = false
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.deleted") {
|
||||
const sessionInfo = props?.info as { id?: string } | undefined
|
||||
if (sessionInfo?.id) {
|
||||
sessions.delete(sessionInfo.id)
|
||||
log(`[${HOOK_NAME}] Session deleted: cleaned up`, { sessionID: sessionInfo.id })
|
||||
}
|
||||
return
|
||||
}
|
||||
},
|
||||
|
||||
"tool.execute.after": async (
|
||||
input: ToolExecuteInput,
|
||||
output: ToolExecuteOutput
|
||||
): Promise<void> => {
|
||||
if (input.tool !== "sisyphus_task") {
|
||||
return
|
||||
}
|
||||
|
||||
const outputStr = output.output && typeof output.output === "string" ? output.output : ""
|
||||
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task resumed")
|
||||
|
||||
if (isBackgroundLaunch) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!isCallerOrchestrator(input.sessionID)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (output.output && typeof output.output === "string") {
|
||||
const gitStats = getGitDiffStats(ctx.directory)
|
||||
const fileChanges = formatFileChanges(gitStats)
|
||||
|
||||
const boulderState = readBoulderState(ctx.directory)
|
||||
|
||||
if (boulderState) {
|
||||
const progress = getPlanProgress(boulderState.active_plan)
|
||||
|
||||
if (input.sessionID && !boulderState.session_ids.includes(input.sessionID)) {
|
||||
appendSessionId(ctx.directory, input.sessionID)
|
||||
log(`[${HOOK_NAME}] Appended session to boulder`, {
|
||||
sessionID: input.sessionID,
|
||||
plan: boulderState.plan_name,
|
||||
})
|
||||
}
|
||||
|
||||
output.output = `
|
||||
## SUBAGENT WORK COMPLETED
|
||||
|
||||
${fileChanges}
|
||||
${buildOrchestratorReminder(boulderState.plan_name, progress)}`
|
||||
|
||||
log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, {
|
||||
plan: boulderState.plan_name,
|
||||
progress: `${progress.completed}/${progress.total}`,
|
||||
fileCount: gitStats.length,
|
||||
})
|
||||
} else {
|
||||
output.output += `\n${buildStandaloneVerificationReminder()}`
|
||||
|
||||
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
||||
sessionID: input.sessionID,
|
||||
fileCount: gitStats.length,
|
||||
})
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user