diff --git a/src/features/boulder-state/storage.ts b/src/features/boulder-state/storage.ts index 1a4cd1b69..ffbbb69a7 100644 --- a/src/features/boulder-state/storage.ts +++ b/src/features/boulder-state/storage.ts @@ -9,6 +9,8 @@ import { dirname, join, basename } from "node:path" import type { BoulderState, PlanProgress, TaskSessionState } from "./types" import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants" +const RESERVED_KEYS = new Set(["__proto__", "prototype", "constructor"]) + export function getBoulderFilePath(directory: string): string { return join(directory, BOULDER_DIR, BOULDER_FILE) } @@ -113,6 +115,10 @@ export function upsertTaskSessionState( return null } + if (RESERVED_KEYS.has(input.taskKey)) { + return null + } + const taskSessions = state.task_sessions ?? {} taskSessions[input.taskKey] = { task_key: input.taskKey, diff --git a/src/features/boulder-state/top-level-task.test.ts b/src/features/boulder-state/top-level-task.test.ts new file mode 100644 index 000000000..9de781cdc --- /dev/null +++ b/src/features/boulder-state/top-level-task.test.ts @@ -0,0 +1,268 @@ +import { describe, expect, test } from "bun:test" +import { writeFileSync } from "node:fs" +import { join } from "node:path" +import { tmpdir } from "node:os" + +import { readCurrentTopLevelTask } from "./top-level-task" + +function writePlanFile(fileName: string, content: string): string { + const planPath = join(tmpdir(), fileName) + writeFileSync(planPath, content, "utf-8") + return planPath +} + +describe("readCurrentTopLevelTask", () => { + test("returns first unchecked top-level task in TODOs", () => { + // given + const planPath = writePlanFile( + `top-level-task-happy-${Date.now()}.md`, + `# Plan + +## TODOs +- [x] 1. Done task +- [ ] 2. Current task + +## Final Verification Wave +- [ ] F1. Final review +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toEqual({ + key: "todo:2", + section: "todo", + label: "2", + title: "Current task", + }) + }) + + test("returns null when all tasks are checked", () => { + // given + const planPath = writePlanFile( + `top-level-task-all-checked-${Date.now()}.md`, + `# Plan + +## TODOs +- [x] 1. Done task +- [x] 2. Another done task + +## Final Verification Wave +- [x] F1. Final done review +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toBeNull() + }) + + test("returns null for empty plan file", () => { + // given + const planPath = writePlanFile(`top-level-task-empty-${Date.now()}.md`, "") + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toBeNull() + }) + + test("returns null when plan file does not exist", () => { + // given + const planPath = join(tmpdir(), `top-level-task-missing-${Date.now()}.md`) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toBeNull() + }) + + test("skips nested or indented checkboxes", () => { + // given + const planPath = writePlanFile( + `top-level-task-nested-${Date.now()}.md`, + `# Plan + +## TODOs +- [x] 1. Done task + - [ ] nested should be ignored +- [ ] 2. Top-level pending +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result?.key).toBe("todo:2") + }) + + test("falls back to Final Verification Wave when TODOs are all checked", () => { + // given + const planPath = writePlanFile( + `top-level-task-fallback-${Date.now()}.md`, + `# Plan + +## TODOs +- [x] 1. Done task +- [x] 2. Done task + +## Final Verification Wave +- [ ] F1. Final review pending +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toEqual({ + key: "final-wave:f1", + section: "final-wave", + label: "F1", + title: "Final review pending", + }) + }) + + test("selects the first unchecked task among mixed checked and unchecked TODOs", () => { + // given + const planPath = writePlanFile( + `top-level-task-mixed-${Date.now()}.md`, + `# Plan + +## TODOs +- [x] 1. Done task +- [ ] 2. First unchecked +- [ ] 3. Second unchecked +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result?.key).toBe("todo:2") + expect(result?.title).toBe("First unchecked") + }) + + test("ignores malformed labels and continues to next unchecked task", () => { + // given + const planPath = writePlanFile( + `top-level-task-malformed-${Date.now()}.md`, + `# Plan + +## TODOs +- [ ] no number prefix +- [ ] 2. Valid task after malformed label +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toEqual({ + key: "todo:2", + section: "todo", + label: "2", + title: "Valid task after malformed label", + }) + }) + + test("supports unchecked tasks with asterisk bullets", () => { + // given + const planPath = writePlanFile( + `top-level-task-asterisk-${Date.now()}.md`, + `# Plan + +## TODOs +* [ ] 1. Task using asterisk bullet +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result?.key).toBe("todo:1") + expect(result?.title).toBe("Task using asterisk bullet") + }) + + test("returns final-wave task when plan has only Final Verification Wave section", () => { + // given + const planPath = writePlanFile( + `top-level-task-final-only-${Date.now()}.md`, + `# Plan + +## Final Verification Wave +- [ ] F2. Final-only task +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result).toEqual({ + key: "final-wave:f2", + section: "final-wave", + label: "F2", + title: "Final-only task", + }) + }) + + test("returns the first unchecked task when multiple unchecked tasks exist", () => { + // given + const planPath = writePlanFile( + `top-level-task-multiple-${Date.now()}.md`, + `# Plan + +## TODOs +- [ ] 1. First unchecked task +- [ ] 2. Second unchecked task +- [ ] 3. Third unchecked task +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result?.label).toBe("1") + expect(result?.title).toBe("First unchecked task") + }) + + test("ignores unchecked content in non-target sections during section transitions", () => { + // given + const planPath = writePlanFile( + `top-level-task-sections-${Date.now()}.md`, + `# Plan + +## Notes +- [ ] 99. Should be ignored because section is not tracked + +## TODOs +- [x] 1. Done implementation task + +## Decisions +- [ ] 100. Should also be ignored + +## Final Verification Wave +- [ ] F3. Final verification task +`, + ) + + // when + const result = readCurrentTopLevelTask(planPath) + + // then + expect(result?.key).toBe("final-wave:f3") + expect(result?.section).toBe("final-wave") + }) +}) diff --git a/src/hooks/atlas/subagent-session-id.test.ts b/src/hooks/atlas/subagent-session-id.test.ts index 5973784c9..45f716f0f 100644 --- a/src/hooks/atlas/subagent-session-id.test.ts +++ b/src/hooks/atlas/subagent-session-id.test.ts @@ -25,6 +25,17 @@ describe("extractSessionIdFromOutput", () => { expect(result).toBe("ses_sync_12345") }) + test("extracts hyphenated session IDs from task metadata blocks", () => { + // given + const output = `Task completed.\n\n\nsession_id: ses_auth-flow-123\n` + + // when + const result = extractSessionIdFromOutput(output) + + // then + expect(result).toBe("ses_auth-flow-123") + }) + test("returns undefined when no session id is present", () => { // given const output = "Task completed without metadata" diff --git a/src/hooks/atlas/subagent-session-id.ts b/src/hooks/atlas/subagent-session-id.ts index 4ce8886f3..b316e5f68 100644 --- a/src/hooks/atlas/subagent-session-id.ts +++ b/src/hooks/atlas/subagent-session-id.ts @@ -7,13 +7,13 @@ export function extractSessionIdFromOutput(output: string): string | undefined { const taskMetadataBlocks = [...output.matchAll(/([\s\S]*?)<\/task_metadata>/gi)] const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1] if (lastTaskMetadataBlock) { - const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_]+)/i) + const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_-]+)/i) if (taskMetadataSessionMatch) { return taskMetadataSessionMatch[1] } } - const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)] + const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_-]+)/g)] return explicitSessionMatches.at(-1)?.[1] } diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index 7be1b14df..823cdf1b9 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -160,8 +160,8 @@ export function createToolExecuteAfterHandler(input: { taskLabel: currentTask.label, taskTitle: currentTask.title, sessionId: subagentSessionId, - agent: toolOutput.metadata?.agent as string | undefined, - category: toolOutput.metadata?.category as string | undefined, + agent: typeof toolOutput.metadata?.agent === "string" ? toolOutput.metadata.agent : undefined, + category: typeof toolOutput.metadata?.category === "string" ? toolOutput.metadata.category : undefined, }) }