diff --git a/src/features/boulder-state/storage.ts b/src/features/boulder-state/storage.ts
index 1a4cd1b69..ffbbb69a7 100644
--- a/src/features/boulder-state/storage.ts
+++ b/src/features/boulder-state/storage.ts
@@ -9,6 +9,8 @@ import { dirname, join, basename } from "node:path"
import type { BoulderState, PlanProgress, TaskSessionState } from "./types"
import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"
+const RESERVED_KEYS = new Set(["__proto__", "prototype", "constructor"])
+
export function getBoulderFilePath(directory: string): string {
return join(directory, BOULDER_DIR, BOULDER_FILE)
}
@@ -113,6 +115,10 @@ export function upsertTaskSessionState(
return null
}
+ if (RESERVED_KEYS.has(input.taskKey)) {
+ return null
+ }
+
const taskSessions = state.task_sessions ?? {}
taskSessions[input.taskKey] = {
task_key: input.taskKey,
diff --git a/src/features/boulder-state/top-level-task.test.ts b/src/features/boulder-state/top-level-task.test.ts
new file mode 100644
index 000000000..9de781cdc
--- /dev/null
+++ b/src/features/boulder-state/top-level-task.test.ts
@@ -0,0 +1,268 @@
+import { describe, expect, test } from "bun:test"
+import { writeFileSync } from "node:fs"
+import { join } from "node:path"
+import { tmpdir } from "node:os"
+
+import { readCurrentTopLevelTask } from "./top-level-task"
+
+function writePlanFile(fileName: string, content: string): string {
+ const planPath = join(tmpdir(), fileName)
+ writeFileSync(planPath, content, "utf-8")
+ return planPath
+}
+
+describe("readCurrentTopLevelTask", () => {
+ test("returns first unchecked top-level task in TODOs", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-happy-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [ ] 2. Current task
+
+## Final Verification Wave
+- [ ] F1. Final review
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toEqual({
+ key: "todo:2",
+ section: "todo",
+ label: "2",
+ title: "Current task",
+ })
+ })
+
+ test("returns null when all tasks are checked", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-all-checked-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [x] 2. Another done task
+
+## Final Verification Wave
+- [x] F1. Final done review
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toBeNull()
+ })
+
+ test("returns null for empty plan file", () => {
+ // given
+ const planPath = writePlanFile(`top-level-task-empty-${Date.now()}.md`, "")
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toBeNull()
+ })
+
+ test("returns null when plan file does not exist", () => {
+ // given
+ const planPath = join(tmpdir(), `top-level-task-missing-${Date.now()}.md`)
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toBeNull()
+ })
+
+ test("skips nested or indented checkboxes", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-nested-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [x] 1. Done task
+ - [ ] nested should be ignored
+- [ ] 2. Top-level pending
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result?.key).toBe("todo:2")
+ })
+
+ test("falls back to Final Verification Wave when TODOs are all checked", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-fallback-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [x] 2. Done task
+
+## Final Verification Wave
+- [ ] F1. Final review pending
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toEqual({
+ key: "final-wave:f1",
+ section: "final-wave",
+ label: "F1",
+ title: "Final review pending",
+ })
+ })
+
+ test("selects the first unchecked task among mixed checked and unchecked TODOs", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-mixed-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [x] 1. Done task
+- [ ] 2. First unchecked
+- [ ] 3. Second unchecked
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result?.key).toBe("todo:2")
+ expect(result?.title).toBe("First unchecked")
+ })
+
+ test("ignores malformed labels and continues to next unchecked task", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-malformed-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [ ] no number prefix
+- [ ] 2. Valid task after malformed label
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toEqual({
+ key: "todo:2",
+ section: "todo",
+ label: "2",
+ title: "Valid task after malformed label",
+ })
+ })
+
+ test("supports unchecked tasks with asterisk bullets", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-asterisk-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+* [ ] 1. Task using asterisk bullet
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result?.key).toBe("todo:1")
+ expect(result?.title).toBe("Task using asterisk bullet")
+ })
+
+ test("returns final-wave task when plan has only Final Verification Wave section", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-final-only-${Date.now()}.md`,
+ `# Plan
+
+## Final Verification Wave
+- [ ] F2. Final-only task
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result).toEqual({
+ key: "final-wave:f2",
+ section: "final-wave",
+ label: "F2",
+ title: "Final-only task",
+ })
+ })
+
+ test("returns the first unchecked task when multiple unchecked tasks exist", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-multiple-${Date.now()}.md`,
+ `# Plan
+
+## TODOs
+- [ ] 1. First unchecked task
+- [ ] 2. Second unchecked task
+- [ ] 3. Third unchecked task
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result?.label).toBe("1")
+ expect(result?.title).toBe("First unchecked task")
+ })
+
+ test("ignores unchecked content in non-target sections during section transitions", () => {
+ // given
+ const planPath = writePlanFile(
+ `top-level-task-sections-${Date.now()}.md`,
+ `# Plan
+
+## Notes
+- [ ] 99. Should be ignored because section is not tracked
+
+## TODOs
+- [x] 1. Done implementation task
+
+## Decisions
+- [ ] 100. Should also be ignored
+
+## Final Verification Wave
+- [ ] F3. Final verification task
+`,
+ )
+
+ // when
+ const result = readCurrentTopLevelTask(planPath)
+
+ // then
+ expect(result?.key).toBe("final-wave:f3")
+ expect(result?.section).toBe("final-wave")
+ })
+})
diff --git a/src/hooks/atlas/subagent-session-id.test.ts b/src/hooks/atlas/subagent-session-id.test.ts
index 5973784c9..45f716f0f 100644
--- a/src/hooks/atlas/subagent-session-id.test.ts
+++ b/src/hooks/atlas/subagent-session-id.test.ts
@@ -25,6 +25,17 @@ describe("extractSessionIdFromOutput", () => {
expect(result).toBe("ses_sync_12345")
})
+ test("extracts hyphenated session IDs from task metadata blocks", () => {
+ // given
+ const output = `Task completed.\n\n\nsession_id: ses_auth-flow-123\n`
+
+ // when
+ const result = extractSessionIdFromOutput(output)
+
+ // then
+ expect(result).toBe("ses_auth-flow-123")
+ })
+
test("returns undefined when no session id is present", () => {
// given
const output = "Task completed without metadata"
diff --git a/src/hooks/atlas/subagent-session-id.ts b/src/hooks/atlas/subagent-session-id.ts
index 4ce8886f3..b316e5f68 100644
--- a/src/hooks/atlas/subagent-session-id.ts
+++ b/src/hooks/atlas/subagent-session-id.ts
@@ -7,13 +7,13 @@ export function extractSessionIdFromOutput(output: string): string | undefined {
const taskMetadataBlocks = [...output.matchAll(/([\s\S]*?)<\/task_metadata>/gi)]
const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1]
if (lastTaskMetadataBlock) {
- const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_]+)/i)
+ const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_-]+)/i)
if (taskMetadataSessionMatch) {
return taskMetadataSessionMatch[1]
}
}
- const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)]
+ const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_-]+)/g)]
return explicitSessionMatches.at(-1)?.[1]
}
diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts
index 7be1b14df..823cdf1b9 100644
--- a/src/hooks/atlas/tool-execute-after.ts
+++ b/src/hooks/atlas/tool-execute-after.ts
@@ -160,8 +160,8 @@ export function createToolExecuteAfterHandler(input: {
taskLabel: currentTask.label,
taskTitle: currentTask.title,
sessionId: subagentSessionId,
- agent: toolOutput.metadata?.agent as string | undefined,
- category: toolOutput.metadata?.category as string | undefined,
+ agent: typeof toolOutput.metadata?.agent === "string" ? toolOutput.metadata.agent : undefined,
+ category: typeof toolOutput.metadata?.category === "string" ? toolOutput.metadata.category : undefined,
})
}