diff --git a/src/features/boulder-state/index.ts b/src/features/boulder-state/index.ts index f404e4e0e..17618996b 100644 --- a/src/features/boulder-state/index.ts +++ b/src/features/boulder-state/index.ts @@ -1,3 +1,4 @@ export * from "./types" export * from "./constants" export * from "./storage" +export * from "./top-level-task" diff --git a/src/features/boulder-state/storage.test.ts b/src/features/boulder-state/storage.test.ts index e52174cef..a8740662d 100644 --- a/src/features/boulder-state/storage.test.ts +++ b/src/features/boulder-state/storage.test.ts @@ -11,8 +11,11 @@ import { getPlanName, createBoulderState, findPrometheusPlans, + getTaskSessionState, + upsertTaskSessionState, } from "./storage" import type { BoulderState } from "./types" +import { readCurrentTopLevelTask } from "./top-level-task" describe("boulder-state", () => { const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now()) @@ -134,6 +137,24 @@ describe("boulder-state", () => { expect(result?.session_ids).toEqual(["session-1", "session-2"]) expect(result?.plan_name).toBe("my-plan") }) + + test("should default task_sessions to empty object when missing from JSON", () => { + // given - boulder.json without task_sessions field + const boulderFile = join(SISYPHUS_DIR, "boulder.json") + writeFileSync(boulderFile, JSON.stringify({ + active_plan: "/path/to/plan.md", + started_at: "2026-01-01T00:00:00Z", + session_ids: ["session-1"], + plan_name: "plan", + })) + + // when + const result = readBoulderState(TEST_DIR) + + // then + expect(result).not.toBeNull() + expect(result!.task_sessions).toEqual({}) + }) }) describe("writeBoulderState", () => { @@ -249,6 +270,115 @@ describe("boulder-state", () => { }) }) + describe("task session state", () => { + test("should persist and read preferred session for a top-level plan task", () => { + // given - existing boulder state + const state: BoulderState = { + active_plan: "/plan.md", + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "plan", + } + writeBoulderState(TEST_DIR, state) + + // when + upsertTaskSessionState(TEST_DIR, { + taskKey: "todo:1", + taskLabel: "1", + taskTitle: "Implement auth flow", + sessionId: "ses_task_123", + agent: "sisyphus-junior", + category: "deep", + }) + const result = getTaskSessionState(TEST_DIR, "todo:1") + + // then + expect(result).not.toBeNull() + expect(result?.session_id).toBe("ses_task_123") + expect(result?.task_title).toBe("Implement auth flow") + expect(result?.agent).toBe("sisyphus-junior") + expect(result?.category).toBe("deep") + }) + + test("should overwrite preferred session for the same top-level plan task", () => { + // given - existing boulder state with prior preferred session + const state: BoulderState = { + active_plan: "/plan.md", + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "plan", + task_sessions: { + "todo:1": { + task_key: "todo:1", + task_label: "1", + task_title: "Implement auth flow", + session_id: "ses_old", + updated_at: "2026-01-02T10:00:00Z", + }, + }, + } + writeBoulderState(TEST_DIR, state) + + // when + upsertTaskSessionState(TEST_DIR, { + taskKey: "todo:1", + taskLabel: "1", + taskTitle: "Implement auth flow", + sessionId: "ses_new", + }) + const result = getTaskSessionState(TEST_DIR, "todo:1") + + // then + expect(result?.session_id).toBe("ses_new") + }) + }) + + describe("readCurrentTopLevelTask", () => { + test("should return the first unchecked top-level task in TODOs", () => { + // given - plan with nested and top-level unchecked tasks + const planPath = join(TEST_DIR, "current-task-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Finished task + - [ ] nested acceptance checkbox +- [ ] 2. Current task + +## Final Verification Wave +- [ ] F1. Final review +`) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).not.toBeNull() + expect(result?.key).toBe("todo:2") + expect(result?.title).toBe("Current task") + }) + + test("should fall back to final-wave task when implementation tasks are complete", () => { + // given - plan with only final-wave work remaining + const planPath = join(TEST_DIR, "final-wave-current-task-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Finished task + +## Final Verification Wave +- [ ] F1. Final review +`) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).not.toBeNull() + expect(result?.key).toBe("final-wave:f1") + expect(result?.title).toBe("Final review") + }) + }) + describe("getPlanProgress", () => { test("should count completed and uncompleted checkboxes", () => { // given - plan file with checkboxes diff --git a/src/features/boulder-state/storage.ts b/src/features/boulder-state/storage.ts index ab84368b7..36286a745 100644 --- a/src/features/boulder-state/storage.ts +++ b/src/features/boulder-state/storage.ts @@ -6,7 +6,7 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs" import { dirname, join, basename } from "node:path" -import type { BoulderState, PlanProgress } from "./types" +import type { BoulderState, PlanProgress, TaskSessionState } from "./types" import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants" export function getBoulderFilePath(directory: string): string { @@ -29,6 +29,9 @@ export function readBoulderState(directory: string): BoulderState | null { if (!Array.isArray(parsed.session_ids)) { parsed.session_ids = [] } + if (!parsed.task_sessions || typeof parsed.task_sessions !== "object" || Array.isArray(parsed.task_sessions)) { + parsed.task_sessions = {} + } return parsed as BoulderState } catch { return null @@ -82,6 +85,50 @@ export function clearBoulderState(directory: string): boolean { } } +export function getTaskSessionState(directory: string, taskKey: string): TaskSessionState | null { + const state = readBoulderState(directory) + if (!state?.task_sessions) { + return null + } + + return state.task_sessions[taskKey] ?? null +} + +export function upsertTaskSessionState( + directory: string, + input: { + taskKey: string + taskLabel: string + taskTitle: string + sessionId: string + agent?: string + category?: string + }, +): BoulderState | null { + const state = readBoulderState(directory) + if (!state) { + return null + } + + const taskSessions = state.task_sessions ?? {} + taskSessions[input.taskKey] = { + task_key: input.taskKey, + task_label: input.taskLabel, + task_title: input.taskTitle, + session_id: input.sessionId, + ...(input.agent !== undefined ? { agent: input.agent } : {}), + ...(input.category !== undefined ? { category: input.category } : {}), + updated_at: new Date().toISOString(), + } + + state.task_sessions = taskSessions + if (writeBoulderState(directory, state)) { + return state + } + + return null +} + /** * Find Prometheus plan files for this project. * Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md diff --git a/src/features/boulder-state/top-level-task.ts b/src/features/boulder-state/top-level-task.ts new file mode 100644 index 000000000..d92970b56 --- /dev/null +++ b/src/features/boulder-state/top-level-task.ts @@ -0,0 +1,77 @@ +import { existsSync, readFileSync } from "node:fs" + +import type { TopLevelTaskRef } from "./types" + +const TODO_HEADING_PATTERN = /^##\s+TODOs\b/i +const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i +const SECOND_LEVEL_HEADING_PATTERN = /^##\s+/ +const UNCHECKED_CHECKBOX_PATTERN = /^(\s*)[-*]\s*\[\s*\]\s*(.+)$/ +const TODO_TASK_PATTERN = /^(\d+)\.\s+(.+)$/ +const FINAL_WAVE_TASK_PATTERN = /^(F\d+)\.\s+(.+)$/i + +type PlanSection = "todo" | "final-wave" | "other" + +function buildTaskRef( + section: "todo" | "final-wave", + taskLabel: string, +): TopLevelTaskRef | null { + const pattern = section === "todo" ? TODO_TASK_PATTERN : FINAL_WAVE_TASK_PATTERN + const match = taskLabel.match(pattern) + if (!match) { + return null + } + + const rawLabel = match[1] + const title = match[2].trim() + + return { + key: `${section}:${rawLabel.toLowerCase()}`, + section, + label: rawLabel, + title, + } +} + +export function readCurrentTopLevelTask(planPath: string): TopLevelTaskRef | null { + if (!existsSync(planPath)) { + return null + } + + try { + const content = readFileSync(planPath, "utf-8") + const lines = content.split(/\r?\n/) + let section: PlanSection = "other" + + for (const line of lines) { + if (SECOND_LEVEL_HEADING_PATTERN.test(line)) { + section = TODO_HEADING_PATTERN.test(line) + ? "todo" + : FINAL_VERIFICATION_HEADING_PATTERN.test(line) + ? "final-wave" + : "other" + } + + const uncheckedTaskMatch = line.match(UNCHECKED_CHECKBOX_PATTERN) + if (!uncheckedTaskMatch) { + continue + } + + if (uncheckedTaskMatch[1].length > 0) { + continue + } + + if (section !== "todo" && section !== "final-wave") { + continue + } + + const taskRef = buildTaskRef(section, uncheckedTaskMatch[2].trim()) + if (taskRef) { + return taskRef + } + } + + return null + } catch { + return null + } +} diff --git a/src/features/boulder-state/types.ts b/src/features/boulder-state/types.ts index b1a225380..ba488f381 100644 --- a/src/features/boulder-state/types.ts +++ b/src/features/boulder-state/types.ts @@ -18,6 +18,8 @@ export interface BoulderState { agent?: string /** Absolute path to the git worktree root where work happens */ worktree_path?: string + /** Preferred reusable subagent sessions keyed by current top-level plan task */ + task_sessions?: Record } export interface PlanProgress { @@ -28,3 +30,31 @@ export interface PlanProgress { /** Whether all tasks are done */ isComplete: boolean } + +export interface TaskSessionState { + /** Stable identifier for the current top-level plan task (e.g. todo:1 / final-wave:F1) */ + task_key: string + /** Original task label from the plan file */ + task_label: string + /** Full task title from the plan file */ + task_title: string + /** Preferred reusable subagent session */ + session_id: string + /** Agent associated with the task session, when known */ + agent?: string + /** Category associated with the task session, when known */ + category?: string + /** Last update timestamp */ + updated_at: string +} + +export interface TopLevelTaskRef { + /** Stable identifier for the current top-level plan task */ + key: string + /** Task section in the Prometheus plan */ + section: "todo" | "final-wave" + /** Original label token (e.g. 1 / F1) */ + label: string + /** Full task title extracted from the checkbox line */ + title: string +} diff --git a/src/hooks/atlas/atlas-hook.ts b/src/hooks/atlas/atlas-hook.ts index 855cdacb6..a814cfe2c 100644 --- a/src/hooks/atlas/atlas-hook.ts +++ b/src/hooks/atlas/atlas-hook.ts @@ -7,6 +7,7 @@ import type { AtlasHookOptions, SessionState } from "./types" export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { const sessions = new Map() const pendingFilePaths = new Map() + const pendingTaskRefs = new Map() const autoCommit = options?.autoCommit ?? true function getState(sessionID: string): SessionState { @@ -20,7 +21,7 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { return { handler: createAtlasEventHandler({ ctx, options, sessions, getState }), - "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }), - "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }), + "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths, pendingTaskRefs }), + "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState }), } } diff --git a/src/hooks/atlas/boulder-continuation-injector.ts b/src/hooks/atlas/boulder-continuation-injector.ts index 4f8e35802..2e01e9ab7 100644 --- a/src/hooks/atlas/boulder-continuation-injector.ts +++ b/src/hooks/atlas/boulder-continuation-injector.ts @@ -15,6 +15,8 @@ export async function injectBoulderContinuation(input: { total: number agent?: string worktreePath?: string + preferredTaskSessionId?: string + preferredTaskTitle?: string backgroundManager?: BackgroundManager sessionState: SessionState }): Promise { @@ -26,6 +28,8 @@ export async function injectBoulderContinuation(input: { total, agent, worktreePath, + preferredTaskSessionId, + preferredTaskTitle, backgroundManager, sessionState, } = input @@ -40,9 +44,13 @@ export async function injectBoulderContinuation(input: { } const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : "" + const preferredSessionContext = preferredTaskSessionId + ? `\n\n[Preferred reuse session for current top-level plan task${preferredTaskTitle ? `: ${preferredTaskTitle}` : ""}: ${preferredTaskSessionId}]` + : "" const prompt = BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) + `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` + + preferredSessionContext + worktreeContext try { diff --git a/src/hooks/atlas/idle-event.ts b/src/hooks/atlas/idle-event.ts index 1f5cfeb2c..2d64c8eed 100644 --- a/src/hooks/atlas/idle-event.ts +++ b/src/hooks/atlas/idle-event.ts @@ -1,5 +1,10 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { getPlanProgress, readBoulderState } from "../../features/boulder-state" +import { + getPlanProgress, + getTaskSessionState, + readBoulderState, + readCurrentTopLevelTask, +} from "../../features/boulder-state" import { log } from "../../shared/logger" import { injectBoulderContinuation } from "./boulder-continuation-injector" import { HOOK_NAME } from "./hook-name" @@ -31,6 +36,14 @@ async function injectContinuation(input: { input.sessionState.lastContinuationInjectedAt = Date.now() try { + const currentBoulder = readBoulderState(input.ctx.directory) + const currentTask = currentBoulder + ? readCurrentTopLevelTask(currentBoulder.active_plan) + : null + const preferredTaskSession = currentTask + ? getTaskSessionState(input.ctx.directory, currentTask.key) + : null + await injectBoulderContinuation({ ctx: input.ctx, sessionID: input.sessionID, @@ -39,6 +52,8 @@ async function injectContinuation(input: { total: input.progress.total, agent: input.agent, worktreePath: input.worktreePath, + preferredTaskSessionId: preferredTaskSession?.session_id, + preferredTaskTitle: preferredTaskSession?.task_title, backgroundManager: input.options?.backgroundManager, sessionState: input.sessionState, }) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 269d7928b..8ec696558 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -404,12 +404,174 @@ describe("atlas hook", () => { // then - should include verification instructions expect(output.output).toContain("LYING") - expect(output.output).toContain("PHASE 1") - expect(output.output).toContain("PHASE 2") + expect(output.output).toContain("PHASE 1") + expect(output.output).toContain("PHASE 2") cleanupMessageStorage(sessionID) }) + test("should persist preferred subagent session for the current top-level task", async () => { + // given - boulder state with a current top-level task, Atlas caller + const sessionID = "session-task-session-track-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "task-session-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow + - [ ] nested acceptance checkbox +`) + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "task-session-plan", + } + writeBoulderState(TEST_DIR, state) + + const hook = createAtlasHook(createMockPluginInput()) + const output = { + title: "Sisyphus Task", + output: `Task completed successfully + + +session_id: ses_auth_flow_123 +`, + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + + // when + await hook["tool.execute.after"]( + { tool: "task", sessionID }, + output + ) + + // then + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123") + expect(updatedState?.task_sessions?.["todo:1"]?.task_title).toBe("Implement auth flow") + expect(updatedState?.task_sessions?.["todo:1"]?.agent).toBe("sisyphus-junior") + expect(updatedState?.task_sessions?.["todo:1"]?.category).toBe("deep") + + cleanupMessageStorage(sessionID) + }) + + test("should preserve the delegated task key even after the plan advances to the next task", async () => { + // given - Atlas caller starts task 1, then the plan advances before task output is processed + const sessionID = "session-stable-task-key-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "stable-task-key-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "stable-task-key-plan", + }) + + const hook = createAtlasHook(createMockPluginInput()) + + // when - Atlas delegates task 1 + await hook["tool.execute.before"]( + { tool: "task", sessionID, callID: "call-task-1" }, + { args: { prompt: "Implement auth flow" } } + ) + + // and the plan is advanced before the task output is processed + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + await hook["tool.execute.after"]( + { tool: "task", sessionID, callID: "call-task-1" }, + { + title: "Sisyphus Task", + output: `Task completed successfully + + +session_id: ses_auth_flow_123 +`, + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + ) + + // then - the completed task session is still recorded against task 1, not task 2 + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123") + expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined() + + cleanupMessageStorage(sessionID) + }) + + test("should not overwrite the current task mapping when task() explicitly resumes an older session", async () => { + // given - current plan is on task 2, but Atlas explicitly resumes an older session for a previous task + const sessionID = "session-cross-task-resume-test" + setupMessageStorage(sessionID, "atlas") + + const planPath = join(TEST_DIR, "cross-task-resume-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [x] 1. Implement auth flow +- [ ] 2. Add API validation +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: ["session-1"], + plan_name: "cross-task-resume-plan", + }) + + const hook = createAtlasHook(createMockPluginInput()) + + // when - Atlas resumes an explicit prior session + await hook["tool.execute.before"]( + { tool: "task", sessionID, callID: "call-resume-old-task" }, + { args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } } + ) + + await hook["tool.execute.after"]( + { tool: "task", sessionID, callID: "call-resume-old-task" }, + { + title: "Sisyphus Task", + output: `Task continued successfully + + +session_id: ses_old_task_111 +`, + metadata: { + agent: "sisyphus-junior", + category: "deep", + }, + } + ) + + // then - Atlas does not poison task 2's preferred session mapping + const updatedState = readBoulderState(TEST_DIR) + expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined() + + cleanupMessageStorage(sessionID) + }) + describe("completion gate output ordering", () => { const COMPLETION_GATE_SESSION = "completion-gate-order-test" @@ -1147,6 +1309,48 @@ describe("atlas hook", () => { expect(callArgs.body.parts[0].text).toContain("2 remaining") }) + test("should include preferred reuse session in continuation prompt for current top-level task", async () => { + // given - boulder state with tracked preferred session + const planPath = join(TEST_DIR, "preferred-session-plan.md") + writeFileSync(planPath, `# Plan + +## TODOs +- [ ] 1. Implement auth flow +`) + + writeBoulderState(TEST_DIR, { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "preferred-session-plan", + task_sessions: { + "todo:1": { + task_key: "todo:1", + task_label: "1", + task_title: "Implement auth flow", + session_id: "ses_auth_flow_123", + updated_at: "2026-01-02T10:00:00Z", + }, + }, + }) + + const mockInput = createMockPluginInput() + const hook = createAtlasHook(mockInput) + + // when + await hook.handler({ + event: { + type: "session.idle", + properties: { sessionID: MAIN_SESSION_ID }, + }, + }) + + // then + const callArgs = mockInput._promptMock.mock.calls[0][0] + expect(callArgs.body.parts[0].text).toContain("Preferred reuse session for current top-level plan task") + expect(callArgs.body.parts[0].text).toContain("ses_auth_flow_123") + }) + test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => { // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work) const planPath = join(TEST_DIR, "test-plan.md") diff --git a/src/hooks/atlas/subagent-session-id.test.ts b/src/hooks/atlas/subagent-session-id.test.ts new file mode 100644 index 000000000..8d4416c9d --- /dev/null +++ b/src/hooks/atlas/subagent-session-id.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, test } from "bun:test" + +import { extractSessionIdFromOutput } from "./subagent-session-id" + +describe("extractSessionIdFromOutput", () => { + test("extracts Session ID blocks from background output", () => { + // given + const output = `Background task launched.\n\nSession ID: ses_bg_12345` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_bg_12345") + }) + + test("extracts session_id from task metadata blocks", () => { + // given + const output = `Task completed.\n\n\nsession_id: ses_sync_12345\n` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_sync_12345") + }) + + test("returns undefined when no session id is present", () => { + // given + const output = "Task completed without metadata" + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBeUndefined() + }) + + test("prefers the session id inside the trailing task_metadata block", () => { + // given + const output = `The previous attempt mentioned session_id: ses_wrong_body_123 but that was only context. + + +session_id: ses_real_metadata_456 +` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_real_metadata_456") + }) +}) diff --git a/src/hooks/atlas/subagent-session-id.ts b/src/hooks/atlas/subagent-session-id.ts index 12cf619b1..9494d5a33 100644 --- a/src/hooks/atlas/subagent-session-id.ts +++ b/src/hooks/atlas/subagent-session-id.ts @@ -1,4 +1,10 @@ -export function extractSessionIdFromOutput(output: string): string { - const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/) - return match?.[1] ?? "" +export function extractSessionIdFromOutput(output: string): string | undefined { + const taskMetadataMatches = [...output.matchAll(/[\s\S]*?session_id:\s*(ses_[a-zA-Z0-9_]+)[\s\S]*?<\/task_metadata>/gi)] + const lastTaskMetadataMatch = taskMetadataMatches.at(-1) + if (lastTaskMetadataMatch) { + return lastTaskMetadataMatch[1] + } + + const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)] + return explicitSessionMatches.at(-1)?.[1] } diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index 55fb8ddd6..ed23ce011 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -1,5 +1,12 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state" +import { + appendSessionId, + getPlanProgress, + getTaskSessionState, + readBoulderState, + readCurrentTopLevelTask, + upsertTaskSessionState, +} from "../../features/boulder-state" import { log } from "../../shared/logger" import { isCallerOrchestrator } from "../../shared/session-utils" import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" @@ -18,13 +25,18 @@ import { isWriteOrEditToolName } from "./write-edit-tool-policy" import type { SessionState } from "./types" import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types" +function resolvePreferredSessionId(currentSessionId?: string, trackedSessionId?: string): string { + return currentSessionId ?? trackedSessionId ?? "" +} + export function createToolExecuteAfterHandler(input: { ctx: PluginInput pendingFilePaths: Map + pendingTaskRefs: Map autoCommit: boolean getState: (sessionID: string) => SessionState }): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise { - const { ctx, pendingFilePaths, autoCommit, getState } = input + const { ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState } = input return async (toolInput, toolOutput): Promise => { // Guard against undefined output (e.g., from /review command - see issue #1035) if (!toolOutput) { @@ -68,10 +80,21 @@ export function createToolExecuteAfterHandler(input: { const gitStats = collectGitDiffStats(ctx.directory) const fileChanges = formatFileChanges(gitStats) const subagentSessionId = extractSessionIdFromOutput(toolOutput.output) + const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined + if (toolInput.callID) { + pendingTaskRefs.delete(toolInput.callID) + } const boulderState = readBoulderState(ctx.directory) if (boulderState) { const progress = getPlanProgress(boulderState.active_plan) + const shouldSkipTaskSessionUpdate = pendingTaskRef === null + const currentTask = shouldSkipTaskSessionUpdate + ? null + : pendingTaskRef ?? readCurrentTopLevelTask(boulderState.active_plan) + const trackedTaskSession = currentTask + ? getTaskSessionState(ctx.directory, currentTask.key) + : null const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) { @@ -82,6 +105,22 @@ export function createToolExecuteAfterHandler(input: { }) } + if (currentTask && subagentSessionId) { + upsertTaskSessionState(ctx.directory, { + taskKey: currentTask.key, + taskLabel: currentTask.label, + taskTitle: currentTask.title, + sessionId: subagentSessionId, + agent: toolOutput.metadata?.agent as string | undefined, + category: toolOutput.metadata?.category as string | undefined, + }) + } + + const preferredSessionId = resolvePreferredSessionId( + subagentSessionId, + trackedTaskSession?.session_id, + ) + // Preserve original subagent response - critical for debugging failed tasks const originalResponse = toolOutput.output const shouldPauseForApproval = sessionState @@ -102,11 +141,11 @@ export function createToolExecuteAfterHandler(input: { } const leadReminder = shouldPauseForApproval - ? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId) - : buildCompletionGate(boulderState.plan_name, subagentSessionId) + ? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, preferredSessionId) + : buildCompletionGate(boulderState.plan_name, preferredSessionId) const followupReminder = shouldPauseForApproval ? null - : buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false) + : buildOrchestratorReminder(boulderState.plan_name, progress, preferredSessionId, autoCommit, false) toolOutput.output = ` @@ -132,10 +171,13 @@ ${ plan: boulderState.plan_name, progress: `${progress.completed}/${progress.total}`, fileCount: gitStats.length, + preferredSessionId, waitingForFinalWaveApproval: shouldPauseForApproval, }) } else { - toolOutput.output += `\n\n${buildStandaloneVerificationReminder(subagentSessionId)}\n` + toolOutput.output += `\n\n${buildStandaloneVerificationReminder( + resolvePreferredSessionId(subagentSessionId), + )}\n` log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, { sessionID: toolInput.sessionID, diff --git a/src/hooks/atlas/tool-execute-before.ts b/src/hooks/atlas/tool-execute-before.ts index 51f670000..d020adaca 100644 --- a/src/hooks/atlas/tool-execute-before.ts +++ b/src/hooks/atlas/tool-execute-before.ts @@ -2,6 +2,7 @@ import { log } from "../../shared/logger" import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive" import { isCallerOrchestrator } from "../../shared/session-utils" import type { PluginInput } from "@opencode-ai/plugin" +import { readBoulderState, readCurrentTopLevelTask } from "../../features/boulder-state" import { HOOK_NAME } from "./hook-name" import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates" import { isSisyphusPath } from "./sisyphus-path" @@ -10,11 +11,12 @@ import { isWriteOrEditToolName } from "./write-edit-tool-policy" export function createToolExecuteBeforeHandler(input: { ctx: PluginInput pendingFilePaths: Map + pendingTaskRefs: Map }): ( toolInput: { tool: string; sessionID?: string; callID?: string }, toolOutput: { args: Record; message?: string } ) => Promise { - const { ctx, pendingFilePaths } = input + const { ctx, pendingFilePaths, pendingTaskRefs } = input return async (toolInput, toolOutput): Promise => { if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) { @@ -43,6 +45,25 @@ export function createToolExecuteBeforeHandler(input: { // Check task - inject single-task directive if (toolInput.tool === "task") { + if (toolInput.callID) { + const requestedSessionId = toolOutput.args.session_id as string | undefined + if (requestedSessionId) { + pendingTaskRefs.set(toolInput.callID, null) + } else { + const boulderState = readBoulderState(ctx.directory) + const currentTask = boulderState + ? readCurrentTopLevelTask(boulderState.active_plan) + : null + if (currentTask) { + pendingTaskRefs.set(toolInput.callID, { + key: currentTask.key, + label: currentTask.label, + title: currentTask.title, + }) + } + } + } + const prompt = toolOutput.args.prompt as string | undefined if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) { toolOutput.args.prompt = `${SINGLE_TASK_DIRECTIVE}\n` + prompt diff --git a/src/hooks/atlas/verification-reminders.ts b/src/hooks/atlas/verification-reminders.ts index cb988984e..80217798d 100644 --- a/src/hooks/atlas/verification-reminders.ts +++ b/src/hooks/atlas/verification-reminders.ts @@ -1,5 +1,14 @@ import { VERIFICATION_REMINDER } from "./system-reminder-templates" +function buildReuseHint(sessionId: string): string { + return ` +**PREFERRED REUSE SESSION FOR THE CURRENT TOP-LEVEL PLAN TASK** + +- Reuse \`${sessionId}\` first if verification fails or the result needs follow-up. +- Start a fresh subagent session only when reuse is unavailable or would cross task boundaries. +` +} + export function buildCompletionGate(planName: string, sessionId: string): string { return ` **COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE** @@ -25,7 +34,8 @@ task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly") **Your completion is NOT tracked until the checkbox is marked in the plan file.** -**VERIFICATION_REMINDER**` +**VERIFICATION_REMINDER** +${buildReuseHint(sessionId)}` } function buildVerificationReminder(sessionId: string): string { @@ -38,7 +48,9 @@ ${VERIFICATION_REMINDER} **If ANY verification fails, use this immediately:** \`\`\` task(session_id="${sessionId}", prompt="fix: [describe the specific failure]") -\`\`\`` +\`\`\` + +${buildReuseHint(sessionId)}` } export function buildOrchestratorReminder(