Merge pull request #2640 from HaD0Yun/had0yun/atlas-task-session-reuse
feat(atlas): persist preferred task session reuse
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
export * from "./types"
|
export * from "./types"
|
||||||
export * from "./constants"
|
export * from "./constants"
|
||||||
export * from "./storage"
|
export * from "./storage"
|
||||||
|
export * from "./top-level-task"
|
||||||
|
|||||||
@@ -11,8 +11,11 @@ import {
|
|||||||
getPlanName,
|
getPlanName,
|
||||||
createBoulderState,
|
createBoulderState,
|
||||||
findPrometheusPlans,
|
findPrometheusPlans,
|
||||||
|
getTaskSessionState,
|
||||||
|
upsertTaskSessionState,
|
||||||
} from "./storage"
|
} from "./storage"
|
||||||
import type { BoulderState } from "./types"
|
import type { BoulderState } from "./types"
|
||||||
|
import { readCurrentTopLevelTask } from "./top-level-task"
|
||||||
|
|
||||||
describe("boulder-state", () => {
|
describe("boulder-state", () => {
|
||||||
const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now())
|
const TEST_DIR = join(tmpdir(), "boulder-state-test-" + Date.now())
|
||||||
@@ -134,6 +137,24 @@ describe("boulder-state", () => {
|
|||||||
expect(result?.session_ids).toEqual(["session-1", "session-2"])
|
expect(result?.session_ids).toEqual(["session-1", "session-2"])
|
||||||
expect(result?.plan_name).toBe("my-plan")
|
expect(result?.plan_name).toBe("my-plan")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should default task_sessions to empty object when missing from JSON", () => {
|
||||||
|
// given - boulder.json without task_sessions field
|
||||||
|
const boulderFile = join(SISYPHUS_DIR, "boulder.json")
|
||||||
|
writeFileSync(boulderFile, JSON.stringify({
|
||||||
|
active_plan: "/path/to/plan.md",
|
||||||
|
started_at: "2026-01-01T00:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "plan",
|
||||||
|
}))
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = readBoulderState(TEST_DIR)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).not.toBeNull()
|
||||||
|
expect(result!.task_sessions).toEqual({})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe("writeBoulderState", () => {
|
describe("writeBoulderState", () => {
|
||||||
@@ -249,6 +270,115 @@ describe("boulder-state", () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("task session state", () => {
|
||||||
|
test("should persist and read preferred session for a top-level plan task", () => {
|
||||||
|
// given - existing boulder state
|
||||||
|
const state: BoulderState = {
|
||||||
|
active_plan: "/plan.md",
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "plan",
|
||||||
|
}
|
||||||
|
writeBoulderState(TEST_DIR, state)
|
||||||
|
|
||||||
|
// when
|
||||||
|
upsertTaskSessionState(TEST_DIR, {
|
||||||
|
taskKey: "todo:1",
|
||||||
|
taskLabel: "1",
|
||||||
|
taskTitle: "Implement auth flow",
|
||||||
|
sessionId: "ses_task_123",
|
||||||
|
agent: "sisyphus-junior",
|
||||||
|
category: "deep",
|
||||||
|
})
|
||||||
|
const result = getTaskSessionState(TEST_DIR, "todo:1")
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).not.toBeNull()
|
||||||
|
expect(result?.session_id).toBe("ses_task_123")
|
||||||
|
expect(result?.task_title).toBe("Implement auth flow")
|
||||||
|
expect(result?.agent).toBe("sisyphus-junior")
|
||||||
|
expect(result?.category).toBe("deep")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should overwrite preferred session for the same top-level plan task", () => {
|
||||||
|
// given - existing boulder state with prior preferred session
|
||||||
|
const state: BoulderState = {
|
||||||
|
active_plan: "/plan.md",
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "plan",
|
||||||
|
task_sessions: {
|
||||||
|
"todo:1": {
|
||||||
|
task_key: "todo:1",
|
||||||
|
task_label: "1",
|
||||||
|
task_title: "Implement auth flow",
|
||||||
|
session_id: "ses_old",
|
||||||
|
updated_at: "2026-01-02T10:00:00Z",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
writeBoulderState(TEST_DIR, state)
|
||||||
|
|
||||||
|
// when
|
||||||
|
upsertTaskSessionState(TEST_DIR, {
|
||||||
|
taskKey: "todo:1",
|
||||||
|
taskLabel: "1",
|
||||||
|
taskTitle: "Implement auth flow",
|
||||||
|
sessionId: "ses_new",
|
||||||
|
})
|
||||||
|
const result = getTaskSessionState(TEST_DIR, "todo:1")
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result?.session_id).toBe("ses_new")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("readCurrentTopLevelTask", () => {
|
||||||
|
test("should return the first unchecked top-level task in TODOs", () => {
|
||||||
|
// given - plan with nested and top-level unchecked tasks
|
||||||
|
const planPath = join(TEST_DIR, "current-task-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [x] 1. Finished task
|
||||||
|
- [ ] nested acceptance checkbox
|
||||||
|
- [ ] 2. Current task
|
||||||
|
|
||||||
|
## Final Verification Wave
|
||||||
|
- [ ] F1. Final review
|
||||||
|
`)
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = readCurrentTopLevelTask(planPath)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).not.toBeNull()
|
||||||
|
expect(result?.key).toBe("todo:2")
|
||||||
|
expect(result?.title).toBe("Current task")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should fall back to final-wave task when implementation tasks are complete", () => {
|
||||||
|
// given - plan with only final-wave work remaining
|
||||||
|
const planPath = join(TEST_DIR, "final-wave-current-task-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [x] 1. Finished task
|
||||||
|
|
||||||
|
## Final Verification Wave
|
||||||
|
- [ ] F1. Final review
|
||||||
|
`)
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = readCurrentTopLevelTask(planPath)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).not.toBeNull()
|
||||||
|
expect(result?.key).toBe("final-wave:f1")
|
||||||
|
expect(result?.title).toBe("Final review")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
describe("getPlanProgress", () => {
|
describe("getPlanProgress", () => {
|
||||||
test("should count completed and uncompleted checkboxes", () => {
|
test("should count completed and uncompleted checkboxes", () => {
|
||||||
// given - plan file with checkboxes
|
// given - plan file with checkboxes
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs"
|
import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs"
|
||||||
import { dirname, join, basename } from "node:path"
|
import { dirname, join, basename } from "node:path"
|
||||||
import type { BoulderState, PlanProgress } from "./types"
|
import type { BoulderState, PlanProgress, TaskSessionState } from "./types"
|
||||||
import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"
|
import { BOULDER_DIR, BOULDER_FILE, PROMETHEUS_PLANS_DIR } from "./constants"
|
||||||
|
|
||||||
export function getBoulderFilePath(directory: string): string {
|
export function getBoulderFilePath(directory: string): string {
|
||||||
@@ -29,6 +29,9 @@ export function readBoulderState(directory: string): BoulderState | null {
|
|||||||
if (!Array.isArray(parsed.session_ids)) {
|
if (!Array.isArray(parsed.session_ids)) {
|
||||||
parsed.session_ids = []
|
parsed.session_ids = []
|
||||||
}
|
}
|
||||||
|
if (!parsed.task_sessions || typeof parsed.task_sessions !== "object" || Array.isArray(parsed.task_sessions)) {
|
||||||
|
parsed.task_sessions = {}
|
||||||
|
}
|
||||||
return parsed as BoulderState
|
return parsed as BoulderState
|
||||||
} catch {
|
} catch {
|
||||||
return null
|
return null
|
||||||
@@ -85,6 +88,50 @@ export function clearBoulderState(directory: string): boolean {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function getTaskSessionState(directory: string, taskKey: string): TaskSessionState | null {
|
||||||
|
const state = readBoulderState(directory)
|
||||||
|
if (!state?.task_sessions) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
return state.task_sessions[taskKey] ?? null
|
||||||
|
}
|
||||||
|
|
||||||
|
export function upsertTaskSessionState(
|
||||||
|
directory: string,
|
||||||
|
input: {
|
||||||
|
taskKey: string
|
||||||
|
taskLabel: string
|
||||||
|
taskTitle: string
|
||||||
|
sessionId: string
|
||||||
|
agent?: string
|
||||||
|
category?: string
|
||||||
|
},
|
||||||
|
): BoulderState | null {
|
||||||
|
const state = readBoulderState(directory)
|
||||||
|
if (!state) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const taskSessions = state.task_sessions ?? {}
|
||||||
|
taskSessions[input.taskKey] = {
|
||||||
|
task_key: input.taskKey,
|
||||||
|
task_label: input.taskLabel,
|
||||||
|
task_title: input.taskTitle,
|
||||||
|
session_id: input.sessionId,
|
||||||
|
...(input.agent !== undefined ? { agent: input.agent } : {}),
|
||||||
|
...(input.category !== undefined ? { category: input.category } : {}),
|
||||||
|
updated_at: new Date().toISOString(),
|
||||||
|
}
|
||||||
|
|
||||||
|
state.task_sessions = taskSessions
|
||||||
|
if (writeBoulderState(directory, state)) {
|
||||||
|
return state
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find Prometheus plan files for this project.
|
* Find Prometheus plan files for this project.
|
||||||
* Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md
|
* Prometheus stores plans at: {project}/.sisyphus/plans/{name}.md
|
||||||
|
|||||||
77
src/features/boulder-state/top-level-task.ts
Normal file
77
src/features/boulder-state/top-level-task.ts
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import { existsSync, readFileSync } from "node:fs"
|
||||||
|
|
||||||
|
import type { TopLevelTaskRef } from "./types"
|
||||||
|
|
||||||
|
const TODO_HEADING_PATTERN = /^##\s+TODOs\b/i
|
||||||
|
const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i
|
||||||
|
const SECOND_LEVEL_HEADING_PATTERN = /^##\s+/
|
||||||
|
const UNCHECKED_CHECKBOX_PATTERN = /^(\s*)[-*]\s*\[\s*\]\s*(.+)$/
|
||||||
|
const TODO_TASK_PATTERN = /^(\d+)\.\s+(.+)$/
|
||||||
|
const FINAL_WAVE_TASK_PATTERN = /^(F\d+)\.\s+(.+)$/i
|
||||||
|
|
||||||
|
type PlanSection = "todo" | "final-wave" | "other"
|
||||||
|
|
||||||
|
function buildTaskRef(
|
||||||
|
section: "todo" | "final-wave",
|
||||||
|
taskLabel: string,
|
||||||
|
): TopLevelTaskRef | null {
|
||||||
|
const pattern = section === "todo" ? TODO_TASK_PATTERN : FINAL_WAVE_TASK_PATTERN
|
||||||
|
const match = taskLabel.match(pattern)
|
||||||
|
if (!match) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawLabel = match[1]
|
||||||
|
const title = match[2].trim()
|
||||||
|
|
||||||
|
return {
|
||||||
|
key: `${section}:${rawLabel.toLowerCase()}`,
|
||||||
|
section,
|
||||||
|
label: rawLabel,
|
||||||
|
title,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function readCurrentTopLevelTask(planPath: string): TopLevelTaskRef | null {
|
||||||
|
if (!existsSync(planPath)) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const content = readFileSync(planPath, "utf-8")
|
||||||
|
const lines = content.split(/\r?\n/)
|
||||||
|
let section: PlanSection = "other"
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (SECOND_LEVEL_HEADING_PATTERN.test(line)) {
|
||||||
|
section = TODO_HEADING_PATTERN.test(line)
|
||||||
|
? "todo"
|
||||||
|
: FINAL_VERIFICATION_HEADING_PATTERN.test(line)
|
||||||
|
? "final-wave"
|
||||||
|
: "other"
|
||||||
|
}
|
||||||
|
|
||||||
|
const uncheckedTaskMatch = line.match(UNCHECKED_CHECKBOX_PATTERN)
|
||||||
|
if (!uncheckedTaskMatch) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uncheckedTaskMatch[1].length > 0) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (section !== "todo" && section !== "final-wave") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const taskRef = buildTaskRef(section, uncheckedTaskMatch[2].trim())
|
||||||
|
if (taskRef) {
|
||||||
|
return taskRef
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
} catch {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,6 +18,8 @@ export interface BoulderState {
|
|||||||
agent?: string
|
agent?: string
|
||||||
/** Absolute path to the git worktree root where work happens */
|
/** Absolute path to the git worktree root where work happens */
|
||||||
worktree_path?: string
|
worktree_path?: string
|
||||||
|
/** Preferred reusable subagent sessions keyed by current top-level plan task */
|
||||||
|
task_sessions?: Record<string, TaskSessionState>
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PlanProgress {
|
export interface PlanProgress {
|
||||||
@@ -28,3 +30,31 @@ export interface PlanProgress {
|
|||||||
/** Whether all tasks are done */
|
/** Whether all tasks are done */
|
||||||
isComplete: boolean
|
isComplete: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface TaskSessionState {
|
||||||
|
/** Stable identifier for the current top-level plan task (e.g. todo:1 / final-wave:F1) */
|
||||||
|
task_key: string
|
||||||
|
/** Original task label from the plan file */
|
||||||
|
task_label: string
|
||||||
|
/** Full task title from the plan file */
|
||||||
|
task_title: string
|
||||||
|
/** Preferred reusable subagent session */
|
||||||
|
session_id: string
|
||||||
|
/** Agent associated with the task session, when known */
|
||||||
|
agent?: string
|
||||||
|
/** Category associated with the task session, when known */
|
||||||
|
category?: string
|
||||||
|
/** Last update timestamp */
|
||||||
|
updated_at: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TopLevelTaskRef {
|
||||||
|
/** Stable identifier for the current top-level plan task */
|
||||||
|
key: string
|
||||||
|
/** Task section in the Prometheus plan */
|
||||||
|
section: "todo" | "final-wave"
|
||||||
|
/** Original label token (e.g. 1 / F1) */
|
||||||
|
label: string
|
||||||
|
/** Full task title extracted from the checkbox line */
|
||||||
|
title: string
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,11 +2,12 @@ import type { PluginInput } from "@opencode-ai/plugin"
|
|||||||
import { createAtlasEventHandler } from "./event-handler"
|
import { createAtlasEventHandler } from "./event-handler"
|
||||||
import { createToolExecuteAfterHandler } from "./tool-execute-after"
|
import { createToolExecuteAfterHandler } from "./tool-execute-after"
|
||||||
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
|
import { createToolExecuteBeforeHandler } from "./tool-execute-before"
|
||||||
import type { AtlasHookOptions, SessionState } from "./types"
|
import type { AtlasHookOptions, PendingTaskRef, SessionState } from "./types"
|
||||||
|
|
||||||
export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
|
export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
|
||||||
const sessions = new Map<string, SessionState>()
|
const sessions = new Map<string, SessionState>()
|
||||||
const pendingFilePaths = new Map<string, string>()
|
const pendingFilePaths = new Map<string, string>()
|
||||||
|
const pendingTaskRefs = new Map<string, PendingTaskRef>()
|
||||||
const autoCommit = options?.autoCommit ?? true
|
const autoCommit = options?.autoCommit ?? true
|
||||||
|
|
||||||
function getState(sessionID: string): SessionState {
|
function getState(sessionID: string): SessionState {
|
||||||
@@ -20,7 +21,7 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
handler: createAtlasEventHandler({ ctx, options, sessions, getState }),
|
handler: createAtlasEventHandler({ ctx, options, sessions, getState }),
|
||||||
"tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }),
|
"tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths, pendingTaskRefs }),
|
||||||
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }),
|
"tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ export async function injectBoulderContinuation(input: {
|
|||||||
total: number
|
total: number
|
||||||
agent?: string
|
agent?: string
|
||||||
worktreePath?: string
|
worktreePath?: string
|
||||||
|
preferredTaskSessionId?: string
|
||||||
|
preferredTaskTitle?: string
|
||||||
backgroundManager?: BackgroundManager
|
backgroundManager?: BackgroundManager
|
||||||
sessionState: SessionState
|
sessionState: SessionState
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
@@ -26,6 +28,8 @@ export async function injectBoulderContinuation(input: {
|
|||||||
total,
|
total,
|
||||||
agent,
|
agent,
|
||||||
worktreePath,
|
worktreePath,
|
||||||
|
preferredTaskSessionId,
|
||||||
|
preferredTaskTitle,
|
||||||
backgroundManager,
|
backgroundManager,
|
||||||
sessionState,
|
sessionState,
|
||||||
} = input
|
} = input
|
||||||
@@ -40,9 +44,13 @@ export async function injectBoulderContinuation(input: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
|
const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
|
||||||
|
const preferredSessionContext = preferredTaskSessionId
|
||||||
|
? `\n\n[Preferred reuse session for current top-level plan task${preferredTaskTitle ? `: ${preferredTaskTitle}` : ""}: ${preferredTaskSessionId}]`
|
||||||
|
: ""
|
||||||
const prompt =
|
const prompt =
|
||||||
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
|
BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
|
||||||
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
|
`\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
|
||||||
|
preferredSessionContext +
|
||||||
worktreeContext
|
worktreeContext
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
|
|||||||
import { tmpdir } from "node:os"
|
import { tmpdir } from "node:os"
|
||||||
import { join } from "node:path"
|
import { join } from "node:path"
|
||||||
import { createOpencodeClient } from "@opencode-ai/sdk"
|
import { createOpencodeClient } from "@opencode-ai/sdk"
|
||||||
import type { AssistantMessage } from "@opencode-ai/sdk"
|
import type { AssistantMessage, Session } from "@opencode-ai/sdk"
|
||||||
import type { BoulderState } from "../../features/boulder-state"
|
import type { BoulderState } from "../../features/boulder-state"
|
||||||
import { clearBoulderState, writeBoulderState } from "../../features/boulder-state"
|
import { clearBoulderState, writeBoulderState } from "../../features/boulder-state"
|
||||||
|
|
||||||
@@ -52,6 +52,23 @@ describe("Atlas final-wave approval gate regressions", () => {
|
|||||||
response: new Response(),
|
response: new Response(),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => {
|
||||||
|
const parentID = path.id === "ses_nested_scope_review"
|
||||||
|
? "atlas-nested-final-wave-session"
|
||||||
|
: path.id.startsWith("ses_parallel_review_")
|
||||||
|
? "atlas-parallel-final-wave-session"
|
||||||
|
: "main-session-123"
|
||||||
|
|
||||||
|
return {
|
||||||
|
data: {
|
||||||
|
id: path.id,
|
||||||
|
parentID,
|
||||||
|
} as Session,
|
||||||
|
request: new Request(`http://localhost/session/${path.id}`),
|
||||||
|
response: new Response(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
directory: testDirectory,
|
directory: testDirectory,
|
||||||
project: {} as AtlasHookContext["project"],
|
project: {} as AtlasHookContext["project"],
|
||||||
|
|||||||
@@ -60,10 +60,18 @@ describe("Atlas final verification approval gate", () => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
Reflect.set(client.session, "get", async () => {
|
Reflect.set(client.session, "get", async ({ path }: { path: { id: string } }) => {
|
||||||
|
const parentID = path.id === "ses_final_wave_review"
|
||||||
|
? "atlas-final-wave-session"
|
||||||
|
: path.id === "ses_feature_task"
|
||||||
|
? "atlas-non-final-session"
|
||||||
|
: "main-session-123"
|
||||||
return {
|
return {
|
||||||
data: { parentID: "main-session-123" } as Session,
|
data: {
|
||||||
request: new Request("http://localhost/session/main-session-123"),
|
id: path.id,
|
||||||
|
parentID,
|
||||||
|
} as Session,
|
||||||
|
request: new Request(`http://localhost/session/${path.id}`),
|
||||||
response: new Response(),
|
response: new Response(),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
import type { PluginInput } from "@opencode-ai/plugin"
|
import type { PluginInput } from "@opencode-ai/plugin"
|
||||||
import { getPlanProgress, readBoulderState } from "../../features/boulder-state"
|
import {
|
||||||
|
getPlanProgress,
|
||||||
|
getTaskSessionState,
|
||||||
|
readBoulderState,
|
||||||
|
readCurrentTopLevelTask,
|
||||||
|
} from "../../features/boulder-state"
|
||||||
import { log } from "../../shared/logger"
|
import { log } from "../../shared/logger"
|
||||||
import { injectBoulderContinuation } from "./boulder-continuation-injector"
|
import { injectBoulderContinuation } from "./boulder-continuation-injector"
|
||||||
import { HOOK_NAME } from "./hook-name"
|
import { HOOK_NAME } from "./hook-name"
|
||||||
@@ -31,6 +36,14 @@ async function injectContinuation(input: {
|
|||||||
input.sessionState.lastContinuationInjectedAt = Date.now()
|
input.sessionState.lastContinuationInjectedAt = Date.now()
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const currentBoulder = readBoulderState(input.ctx.directory)
|
||||||
|
const currentTask = currentBoulder
|
||||||
|
? readCurrentTopLevelTask(currentBoulder.active_plan)
|
||||||
|
: null
|
||||||
|
const preferredTaskSession = currentTask
|
||||||
|
? getTaskSessionState(input.ctx.directory, currentTask.key)
|
||||||
|
: null
|
||||||
|
|
||||||
await injectBoulderContinuation({
|
await injectBoulderContinuation({
|
||||||
ctx: input.ctx,
|
ctx: input.ctx,
|
||||||
sessionID: input.sessionID,
|
sessionID: input.sessionID,
|
||||||
@@ -39,6 +52,8 @@ async function injectContinuation(input: {
|
|||||||
total: input.progress.total,
|
total: input.progress.total,
|
||||||
agent: input.agent,
|
agent: input.agent,
|
||||||
worktreePath: input.worktreePath,
|
worktreePath: input.worktreePath,
|
||||||
|
preferredTaskSessionId: preferredTaskSession?.session_id,
|
||||||
|
preferredTaskTitle: preferredTaskSession?.task_title,
|
||||||
backgroundManager: input.options?.backgroundManager,
|
backgroundManager: input.options?.backgroundManager,
|
||||||
sessionState: input.sessionState,
|
sessionState: input.sessionState,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import {
|
|||||||
} from "../../features/boulder-state"
|
} from "../../features/boulder-state"
|
||||||
import type { BoulderState } from "../../features/boulder-state"
|
import type { BoulderState } from "../../features/boulder-state"
|
||||||
import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state"
|
import { _resetForTesting, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state"
|
||||||
|
import type { PendingTaskRef } from "./types"
|
||||||
|
|
||||||
const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`)
|
const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`)
|
||||||
const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message")
|
const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message")
|
||||||
@@ -33,25 +34,40 @@ mock.module("../../shared/opencode-storage-detection", () => ({
|
|||||||
}))
|
}))
|
||||||
|
|
||||||
const { createAtlasHook } = await import("./index")
|
const { createAtlasHook } = await import("./index")
|
||||||
|
const { createToolExecuteAfterHandler } = await import("./tool-execute-after")
|
||||||
|
const { createToolExecuteBeforeHandler } = await import("./tool-execute-before")
|
||||||
const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")
|
const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")
|
||||||
|
|
||||||
describe("atlas hook", () => {
|
describe("atlas hook", () => {
|
||||||
let TEST_DIR: string
|
let TEST_DIR: string
|
||||||
let SISYPHUS_DIR: string
|
let SISYPHUS_DIR: string
|
||||||
|
|
||||||
function createMockPluginInput(overrides?: { promptMock?: ReturnType<typeof mock> }) {
|
function createMockPluginInput(overrides?: {
|
||||||
|
promptMock?: ReturnType<typeof mock>
|
||||||
|
sessionGetMock?: ReturnType<typeof mock>
|
||||||
|
}) {
|
||||||
const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve())
|
const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve())
|
||||||
|
const sessionGetMock = overrides?.sessionGetMock ?? mock(async ({ path }: { path: { id: string } }) => ({
|
||||||
|
data: {
|
||||||
|
id: path.id,
|
||||||
|
parentID: path.id.startsWith("ses_") ? "session-1" : "main-session-123",
|
||||||
|
},
|
||||||
|
}))
|
||||||
return {
|
return {
|
||||||
directory: TEST_DIR,
|
directory: TEST_DIR,
|
||||||
client: {
|
client: {
|
||||||
session: {
|
session: {
|
||||||
get: async () => ({ data: { parentID: "main-session-123" } }),
|
get: sessionGetMock,
|
||||||
prompt: promptMock,
|
prompt: promptMock,
|
||||||
promptAsync: promptMock,
|
promptAsync: promptMock,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
_promptMock: promptMock,
|
_promptMock: promptMock,
|
||||||
} as unknown as Parameters<typeof createAtlasHook>[0] & { _promptMock: ReturnType<typeof mock> }
|
_sessionGetMock: sessionGetMock,
|
||||||
|
} as unknown as Parameters<typeof createAtlasHook>[0] & {
|
||||||
|
_promptMock: ReturnType<typeof mock>
|
||||||
|
_sessionGetMock: ReturnType<typeof mock>
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function setupMessageStorage(sessionID: string, agent: string): void {
|
function setupMessageStorage(sessionID: string, agent: string): void {
|
||||||
@@ -404,12 +420,417 @@ describe("atlas hook", () => {
|
|||||||
|
|
||||||
// then - should include verification instructions
|
// then - should include verification instructions
|
||||||
expect(output.output).toContain("LYING")
|
expect(output.output).toContain("LYING")
|
||||||
expect(output.output).toContain("PHASE 1")
|
expect(output.output).toContain("PHASE 1")
|
||||||
expect(output.output).toContain("PHASE 2")
|
expect(output.output).toContain("PHASE 2")
|
||||||
|
|
||||||
cleanupMessageStorage(sessionID)
|
cleanupMessageStorage(sessionID)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should clean pending task refs when a task returns background launch output", async () => {
|
||||||
|
// given - direct handlers with shared pending maps
|
||||||
|
const sessionID = "session-bg-launch-cleanup-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "background-cleanup-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [ ] 1. Implement auth flow
|
||||||
|
`)
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "background-cleanup-plan",
|
||||||
|
})
|
||||||
|
|
||||||
|
const pendingFilePaths = new Map<string, string>()
|
||||||
|
const pendingTaskRefs = new Map<string, PendingTaskRef>()
|
||||||
|
const beforeHandler = createToolExecuteBeforeHandler({
|
||||||
|
ctx: createMockPluginInput(),
|
||||||
|
pendingFilePaths,
|
||||||
|
pendingTaskRefs,
|
||||||
|
})
|
||||||
|
const afterHandler = createToolExecuteAfterHandler({
|
||||||
|
ctx: createMockPluginInput(),
|
||||||
|
pendingFilePaths,
|
||||||
|
pendingTaskRefs,
|
||||||
|
autoCommit: true,
|
||||||
|
getState: () => ({ promptFailureCount: 0 }),
|
||||||
|
})
|
||||||
|
|
||||||
|
// when - the task is captured before execution
|
||||||
|
await beforeHandler(
|
||||||
|
{ tool: "task", sessionID, callID: "call-bg-launch" },
|
||||||
|
{ args: { prompt: "Implement auth flow" } }
|
||||||
|
)
|
||||||
|
expect(pendingTaskRefs.size).toBe(1)
|
||||||
|
|
||||||
|
// and the task returns a background launch result
|
||||||
|
await afterHandler(
|
||||||
|
{ tool: "task", sessionID, callID: "call-bg-launch" },
|
||||||
|
{
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: "Background task launched.\n\nSession ID: ses_bg_12345",
|
||||||
|
metadata: {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// then - the pending task ref is still cleaned up
|
||||||
|
expect(pendingTaskRefs.size).toBe(0)
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should persist preferred subagent session for the current top-level task", async () => {
|
||||||
|
// given - boulder state with a current top-level task, Atlas caller
|
||||||
|
const sessionID = "session-task-session-track-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "task-session-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [ ] 1. Implement auth flow
|
||||||
|
- [ ] nested acceptance checkbox
|
||||||
|
`)
|
||||||
|
|
||||||
|
const state: BoulderState = {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "task-session-plan",
|
||||||
|
}
|
||||||
|
writeBoulderState(TEST_DIR, state)
|
||||||
|
|
||||||
|
const hook = createAtlasHook(createMockPluginInput())
|
||||||
|
const output = {
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: `Task completed successfully
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_auth_flow_123
|
||||||
|
</task_metadata>`,
|
||||||
|
metadata: {
|
||||||
|
agent: "sisyphus-junior",
|
||||||
|
category: "deep",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "task", sessionID },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// then
|
||||||
|
const updatedState = readBoulderState(TEST_DIR)
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]?.task_title).toBe("Implement auth flow")
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]?.agent).toBe("sisyphus-junior")
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]?.category).toBe("deep")
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should preserve the delegated task key even after the plan advances to the next task", async () => {
|
||||||
|
// given - Atlas caller starts task 1, then the plan advances before task output is processed
|
||||||
|
const sessionID = "session-stable-task-key-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "stable-task-key-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [ ] 1. Implement auth flow
|
||||||
|
- [ ] 2. Add API validation
|
||||||
|
`)
|
||||||
|
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "stable-task-key-plan",
|
||||||
|
})
|
||||||
|
|
||||||
|
const hook = createAtlasHook(createMockPluginInput())
|
||||||
|
|
||||||
|
// when - Atlas delegates task 1
|
||||||
|
await hook["tool.execute.before"](
|
||||||
|
{ tool: "task", sessionID, callID: "call-task-1" },
|
||||||
|
{ args: { prompt: "Implement auth flow" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
// and the plan is advanced before the task output is processed
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [x] 1. Implement auth flow
|
||||||
|
- [ ] 2. Add API validation
|
||||||
|
`)
|
||||||
|
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "task", sessionID, callID: "call-task-1" },
|
||||||
|
{
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: `Task completed successfully
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_auth_flow_123
|
||||||
|
</task_metadata>`,
|
||||||
|
metadata: {
|
||||||
|
agent: "sisyphus-junior",
|
||||||
|
category: "deep",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// then - the completed task session is still recorded against task 1, not task 2
|
||||||
|
const updatedState = readBoulderState(TEST_DIR)
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
|
||||||
|
expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should not overwrite the current task mapping when task() explicitly resumes an older session", async () => {
|
||||||
|
// given - current plan is on task 2, but Atlas explicitly resumes an older session for a previous task
|
||||||
|
const sessionID = "session-cross-task-resume-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "cross-task-resume-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [x] 1. Implement auth flow
|
||||||
|
- [ ] 2. Add API validation
|
||||||
|
`)
|
||||||
|
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "cross-task-resume-plan",
|
||||||
|
})
|
||||||
|
|
||||||
|
const hook = createAtlasHook(createMockPluginInput())
|
||||||
|
|
||||||
|
// when - Atlas resumes an explicit prior session
|
||||||
|
await hook["tool.execute.before"](
|
||||||
|
{ tool: "task", sessionID, callID: "call-resume-old-task" },
|
||||||
|
{ args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
const output = {
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: `Task continued successfully
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_old_task_111
|
||||||
|
</task_metadata>`,
|
||||||
|
metadata: {
|
||||||
|
agent: "sisyphus-junior",
|
||||||
|
category: "deep",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "task", sessionID, callID: "call-resume-old-task" },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// then - Atlas does not poison task 2's preferred session mapping
|
||||||
|
const updatedState = readBoulderState(TEST_DIR)
|
||||||
|
expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
|
||||||
|
expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should not reuse an explicitly resumed session id in completion reminders", async () => {
|
||||||
|
// given - current plan is on task 2 with an existing tracked session
|
||||||
|
const sessionID = "session-explicit-resume-reminder-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "explicit-resume-reminder-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [x] 1. Implement auth flow
|
||||||
|
- [ ] 2. Add API validation
|
||||||
|
`)
|
||||||
|
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "explicit-resume-reminder-plan",
|
||||||
|
task_sessions: {
|
||||||
|
"todo:2": {
|
||||||
|
task_key: "todo:2",
|
||||||
|
task_label: "2",
|
||||||
|
task_title: "Add API validation",
|
||||||
|
session_id: "ses_tracked_current_task",
|
||||||
|
updated_at: "2026-01-02T10:00:00Z",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const hook = createAtlasHook(createMockPluginInput())
|
||||||
|
const output = {
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: `Task continued successfully
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_old_task_111
|
||||||
|
</task_metadata>`,
|
||||||
|
metadata: {},
|
||||||
|
}
|
||||||
|
|
||||||
|
// when
|
||||||
|
await hook["tool.execute.before"](
|
||||||
|
{ tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
|
||||||
|
{ args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
|
||||||
|
)
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
|
||||||
|
expect(output.output).toContain("ses_tracked_current_task")
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should skip persistence when multiple in-flight task calls claim the same top-level task", async () => {
|
||||||
|
// given
|
||||||
|
const sessionID = "session-parallel-task-collision-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "parallel-task-collision-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [ ] 1. Implement auth flow
|
||||||
|
- [ ] 2. Add API validation
|
||||||
|
`)
|
||||||
|
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "parallel-task-collision-plan",
|
||||||
|
})
|
||||||
|
|
||||||
|
const pendingFilePaths = new Map<string, string>()
|
||||||
|
const pendingTaskRefs = new Map<string, PendingTaskRef>()
|
||||||
|
const beforeHandler = createToolExecuteBeforeHandler({
|
||||||
|
ctx: createMockPluginInput(),
|
||||||
|
pendingFilePaths,
|
||||||
|
pendingTaskRefs,
|
||||||
|
})
|
||||||
|
const afterHandler = createToolExecuteAfterHandler({
|
||||||
|
ctx: createMockPluginInput(),
|
||||||
|
pendingFilePaths,
|
||||||
|
pendingTaskRefs,
|
||||||
|
autoCommit: true,
|
||||||
|
getState: () => ({ promptFailureCount: 0 }),
|
||||||
|
})
|
||||||
|
|
||||||
|
// when - two task() calls start before either one completes
|
||||||
|
await beforeHandler(
|
||||||
|
{ tool: "task", sessionID, callID: "call-task-first" },
|
||||||
|
{ args: { prompt: "Implement auth flow part 1" } }
|
||||||
|
)
|
||||||
|
await beforeHandler(
|
||||||
|
{ tool: "task", sessionID, callID: "call-task-second" },
|
||||||
|
{ args: { prompt: "Implement auth flow part 2" } }
|
||||||
|
)
|
||||||
|
|
||||||
|
const secondPendingTaskRef = pendingTaskRefs.get("call-task-second")
|
||||||
|
|
||||||
|
await afterHandler(
|
||||||
|
{ tool: "task", sessionID, callID: "call-task-second" },
|
||||||
|
{
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: `Task completed successfully
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_parallel_collision_222
|
||||||
|
</task_metadata>`,
|
||||||
|
metadata: {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(secondPendingTaskRef).toEqual({
|
||||||
|
kind: "skip",
|
||||||
|
reason: "ambiguous_task_key",
|
||||||
|
task: {
|
||||||
|
key: "todo:1",
|
||||||
|
label: "1",
|
||||||
|
title: "Implement auth flow",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
const updatedState = readBoulderState(TEST_DIR)
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("should ignore extracted session ids that are outside the active boulder lineage", async () => {
|
||||||
|
// given
|
||||||
|
const sessionID = "session-untrusted-session-id-test"
|
||||||
|
setupMessageStorage(sessionID, "atlas")
|
||||||
|
|
||||||
|
const planPath = join(TEST_DIR, "untrusted-session-id-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [ ] 1. Implement auth flow
|
||||||
|
`)
|
||||||
|
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: ["session-1"],
|
||||||
|
plan_name: "untrusted-session-id-plan",
|
||||||
|
})
|
||||||
|
|
||||||
|
const hook = createAtlasHook(createMockPluginInput({
|
||||||
|
sessionGetMock: mock(async ({ path }: { path: { id: string } }) => ({
|
||||||
|
data: {
|
||||||
|
id: path.id,
|
||||||
|
parentID: path.id === "ses_untrusted_999" ? "session-outside-lineage" : "main-session-123",
|
||||||
|
},
|
||||||
|
})),
|
||||||
|
}))
|
||||||
|
const output = {
|
||||||
|
title: "Sisyphus Task",
|
||||||
|
output: `Task completed successfully
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_untrusted_999
|
||||||
|
</task_metadata>`,
|
||||||
|
metadata: {},
|
||||||
|
}
|
||||||
|
|
||||||
|
// when
|
||||||
|
await hook["tool.execute.after"](
|
||||||
|
{ tool: "task", sessionID },
|
||||||
|
output
|
||||||
|
)
|
||||||
|
|
||||||
|
// then
|
||||||
|
const updatedState = readBoulderState(TEST_DIR)
|
||||||
|
expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
|
||||||
|
expect(output.output).not.toContain('task(session_id="ses_untrusted_999"')
|
||||||
|
expect(output.output).toContain('task(session_id="<session_id>"')
|
||||||
|
|
||||||
|
cleanupMessageStorage(sessionID)
|
||||||
|
})
|
||||||
|
|
||||||
describe("completion gate output ordering", () => {
|
describe("completion gate output ordering", () => {
|
||||||
const COMPLETION_GATE_SESSION = "completion-gate-order-test"
|
const COMPLETION_GATE_SESSION = "completion-gate-order-test"
|
||||||
|
|
||||||
@@ -1147,6 +1568,48 @@ describe("atlas hook", () => {
|
|||||||
expect(callArgs.body.parts[0].text).toContain("2 remaining")
|
expect(callArgs.body.parts[0].text).toContain("2 remaining")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should include preferred reuse session in continuation prompt for current top-level task", async () => {
|
||||||
|
// given - boulder state with tracked preferred session
|
||||||
|
const planPath = join(TEST_DIR, "preferred-session-plan.md")
|
||||||
|
writeFileSync(planPath, `# Plan
|
||||||
|
|
||||||
|
## TODOs
|
||||||
|
- [ ] 1. Implement auth flow
|
||||||
|
`)
|
||||||
|
|
||||||
|
writeBoulderState(TEST_DIR, {
|
||||||
|
active_plan: planPath,
|
||||||
|
started_at: "2026-01-02T10:00:00Z",
|
||||||
|
session_ids: [MAIN_SESSION_ID],
|
||||||
|
plan_name: "preferred-session-plan",
|
||||||
|
task_sessions: {
|
||||||
|
"todo:1": {
|
||||||
|
task_key: "todo:1",
|
||||||
|
task_label: "1",
|
||||||
|
task_title: "Implement auth flow",
|
||||||
|
session_id: "ses_auth_flow_123",
|
||||||
|
updated_at: "2026-01-02T10:00:00Z",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const mockInput = createMockPluginInput()
|
||||||
|
const hook = createAtlasHook(mockInput)
|
||||||
|
|
||||||
|
// when
|
||||||
|
await hook.handler({
|
||||||
|
event: {
|
||||||
|
type: "session.idle",
|
||||||
|
properties: { sessionID: MAIN_SESSION_ID },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// then
|
||||||
|
const callArgs = mockInput._promptMock.mock.calls[0][0]
|
||||||
|
expect(callArgs.body.parts[0].text).toContain("Preferred reuse session for current top-level plan task")
|
||||||
|
expect(callArgs.body.parts[0].text).toContain("ses_auth_flow_123")
|
||||||
|
})
|
||||||
|
|
||||||
test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
|
test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
|
||||||
// given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
|
// given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
|
||||||
const planPath = join(TEST_DIR, "test-plan.md")
|
const planPath = join(TEST_DIR, "test-plan.md")
|
||||||
|
|||||||
68
src/hooks/atlas/subagent-session-id.test.ts
Normal file
68
src/hooks/atlas/subagent-session-id.test.ts
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import { describe, expect, test } from "bun:test"
|
||||||
|
|
||||||
|
import { extractSessionIdFromOutput } from "./subagent-session-id"
|
||||||
|
|
||||||
|
describe("extractSessionIdFromOutput", () => {
|
||||||
|
test("extracts Session ID blocks from background output", () => {
|
||||||
|
// given
|
||||||
|
const output = `Background task launched.\n\nSession ID: ses_bg_12345`
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = extractSessionIdFromOutput(output)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).toBe("ses_bg_12345")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("extracts session_id from task metadata blocks", () => {
|
||||||
|
// given
|
||||||
|
const output = `Task completed.\n\n<task_metadata>\nsession_id: ses_sync_12345\n</task_metadata>`
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = extractSessionIdFromOutput(output)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).toBe("ses_sync_12345")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("returns undefined when no session id is present", () => {
|
||||||
|
// given
|
||||||
|
const output = "Task completed without metadata"
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = extractSessionIdFromOutput(output)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test("prefers the session id inside the trailing task_metadata block", () => {
|
||||||
|
// given
|
||||||
|
const output = `The previous attempt mentioned session_id: ses_wrong_body_123 but that was only context.
|
||||||
|
|
||||||
|
<task_metadata>
|
||||||
|
session_id: ses_real_metadata_456
|
||||||
|
</task_metadata>`
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = extractSessionIdFromOutput(output)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).toBe("ses_real_metadata_456")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("does not let task_metadata parsing bleed into incidental body text after the closing tag", () => {
|
||||||
|
// given
|
||||||
|
const output = `<task_metadata>
|
||||||
|
session_id: ses_real_metadata_456
|
||||||
|
</task_metadata>
|
||||||
|
|
||||||
|
debug log: session_id: ses_wrong_body_789`
|
||||||
|
|
||||||
|
// when
|
||||||
|
const result = extractSessionIdFromOutput(output)
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(result).toBe("ses_real_metadata_456")
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -1,4 +1,44 @@
|
|||||||
export function extractSessionIdFromOutput(output: string): string {
|
import type { PluginInput } from "@opencode-ai/plugin"
|
||||||
const match = output.match(/Session ID:\s*(ses_[a-zA-Z0-9]+)/)
|
import { log } from "../../shared/logger"
|
||||||
return match?.[1] ?? "<session_id>"
|
import { isSessionInBoulderLineage } from "./boulder-session-lineage"
|
||||||
|
import { HOOK_NAME } from "./hook-name"
|
||||||
|
|
||||||
|
export function extractSessionIdFromOutput(output: string): string | undefined {
|
||||||
|
const taskMetadataBlocks = [...output.matchAll(/<task_metadata>([\s\S]*?)<\/task_metadata>/gi)]
|
||||||
|
const lastTaskMetadataBlock = taskMetadataBlocks.at(-1)?.[1]
|
||||||
|
if (lastTaskMetadataBlock) {
|
||||||
|
const taskMetadataSessionMatch = lastTaskMetadataBlock.match(/session_id:\s*(ses_[a-zA-Z0-9_]+)/i)
|
||||||
|
if (taskMetadataSessionMatch) {
|
||||||
|
return taskMetadataSessionMatch[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const explicitSessionMatches = [...output.matchAll(/Session ID:\s*(ses_[a-zA-Z0-9_]+)/g)]
|
||||||
|
return explicitSessionMatches.at(-1)?.[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function validateSubagentSessionId(input: {
|
||||||
|
client: PluginInput["client"]
|
||||||
|
sessionID?: string
|
||||||
|
lineageSessionIDs: string[]
|
||||||
|
}): Promise<string | undefined> {
|
||||||
|
if (!input.sessionID || input.lineageSessionIDs.length === 0) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const belongsToLineage = await isSessionInBoulderLineage({
|
||||||
|
client: input.client,
|
||||||
|
sessionID: input.sessionID,
|
||||||
|
boulderSessionIDs: input.lineageSessionIDs,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!belongsToLineage) {
|
||||||
|
log(`[${HOOK_NAME}] Ignoring extracted session id outside active lineage`, {
|
||||||
|
sessionID: input.sessionID,
|
||||||
|
lineageSessionIDs: input.lineageSessionIDs,
|
||||||
|
})
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
return input.sessionID
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,12 @@
|
|||||||
import type { PluginInput } from "@opencode-ai/plugin"
|
import type { PluginInput } from "@opencode-ai/plugin"
|
||||||
import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state"
|
import {
|
||||||
|
appendSessionId,
|
||||||
|
getPlanProgress,
|
||||||
|
getTaskSessionState,
|
||||||
|
readBoulderState,
|
||||||
|
readCurrentTopLevelTask,
|
||||||
|
upsertTaskSessionState,
|
||||||
|
} from "../../features/boulder-state"
|
||||||
import { log } from "../../shared/logger"
|
import { log } from "../../shared/logger"
|
||||||
import { isCallerOrchestrator } from "../../shared/session-utils"
|
import { isCallerOrchestrator } from "../../shared/session-utils"
|
||||||
import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree"
|
import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree"
|
||||||
@@ -7,7 +14,7 @@ import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate"
|
|||||||
import { HOOK_NAME } from "./hook-name"
|
import { HOOK_NAME } from "./hook-name"
|
||||||
import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
|
import { DIRECT_WORK_REMINDER } from "./system-reminder-templates"
|
||||||
import { isSisyphusPath } from "./sisyphus-path"
|
import { isSisyphusPath } from "./sisyphus-path"
|
||||||
import { extractSessionIdFromOutput } from "./subagent-session-id"
|
import { extractSessionIdFromOutput, validateSubagentSessionId } from "./subagent-session-id"
|
||||||
import {
|
import {
|
||||||
buildCompletionGate,
|
buildCompletionGate,
|
||||||
buildFinalWaveApprovalReminder,
|
buildFinalWaveApprovalReminder,
|
||||||
@@ -15,16 +22,60 @@ import {
|
|||||||
buildStandaloneVerificationReminder,
|
buildStandaloneVerificationReminder,
|
||||||
} from "./verification-reminders"
|
} from "./verification-reminders"
|
||||||
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
||||||
import type { SessionState } from "./types"
|
import type { PendingTaskRef, SessionState } from "./types"
|
||||||
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types"
|
import type { ToolExecuteAfterInput, ToolExecuteAfterOutput, TrackedTopLevelTaskRef } from "./types"
|
||||||
|
|
||||||
|
function resolvePreferredSessionId(currentSessionId?: string, trackedSessionId?: string): string {
|
||||||
|
return currentSessionId ?? trackedSessionId ?? "<session_id>"
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveTaskContext(
|
||||||
|
pendingTaskRef: PendingTaskRef | undefined,
|
||||||
|
planPath: string,
|
||||||
|
): {
|
||||||
|
currentTask: TrackedTopLevelTaskRef | null
|
||||||
|
shouldSkipTaskSessionUpdate: boolean
|
||||||
|
shouldIgnoreCurrentSessionId: boolean
|
||||||
|
} {
|
||||||
|
if (!pendingTaskRef) {
|
||||||
|
return {
|
||||||
|
currentTask: readCurrentTopLevelTask(planPath),
|
||||||
|
shouldSkipTaskSessionUpdate: false,
|
||||||
|
shouldIgnoreCurrentSessionId: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pendingTaskRef.kind === "track") {
|
||||||
|
return {
|
||||||
|
currentTask: pendingTaskRef.task,
|
||||||
|
shouldSkipTaskSessionUpdate: false,
|
||||||
|
shouldIgnoreCurrentSessionId: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pendingTaskRef.reason === "explicit_resume") {
|
||||||
|
return {
|
||||||
|
currentTask: readCurrentTopLevelTask(planPath),
|
||||||
|
shouldSkipTaskSessionUpdate: true,
|
||||||
|
shouldIgnoreCurrentSessionId: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
currentTask: pendingTaskRef.task,
|
||||||
|
shouldSkipTaskSessionUpdate: true,
|
||||||
|
shouldIgnoreCurrentSessionId: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function createToolExecuteAfterHandler(input: {
|
export function createToolExecuteAfterHandler(input: {
|
||||||
ctx: PluginInput
|
ctx: PluginInput
|
||||||
pendingFilePaths: Map<string, string>
|
pendingFilePaths: Map<string, string>
|
||||||
|
pendingTaskRefs: Map<string, PendingTaskRef>
|
||||||
autoCommit: boolean
|
autoCommit: boolean
|
||||||
getState: (sessionID: string) => SessionState
|
getState: (sessionID: string) => SessionState
|
||||||
}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> {
|
}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise<void> {
|
||||||
const { ctx, pendingFilePaths, autoCommit, getState } = input
|
const { ctx, pendingFilePaths, pendingTaskRefs, autoCommit, getState } = input
|
||||||
return async (toolInput, toolOutput): Promise<void> => {
|
return async (toolInput, toolOutput): Promise<void> => {
|
||||||
// Guard against undefined output (e.g., from /review command - see issue #1035)
|
// Guard against undefined output (e.g., from /review command - see issue #1035)
|
||||||
if (!toolOutput) {
|
if (!toolOutput) {
|
||||||
@@ -59,6 +110,10 @@ export function createToolExecuteAfterHandler(input: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const outputStr = toolOutput.output && typeof toolOutput.output === "string" ? toolOutput.output : ""
|
const outputStr = toolOutput.output && typeof toolOutput.output === "string" ? toolOutput.output : ""
|
||||||
|
const pendingTaskRef = toolInput.callID ? pendingTaskRefs.get(toolInput.callID) : undefined
|
||||||
|
if (toolInput.callID) {
|
||||||
|
pendingTaskRefs.delete(toolInput.callID)
|
||||||
|
}
|
||||||
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
|
const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
|
||||||
if (isBackgroundLaunch) {
|
if (isBackgroundLaunch) {
|
||||||
return
|
return
|
||||||
@@ -67,11 +122,19 @@ export function createToolExecuteAfterHandler(input: {
|
|||||||
if (toolOutput.output && typeof toolOutput.output === "string") {
|
if (toolOutput.output && typeof toolOutput.output === "string") {
|
||||||
const gitStats = collectGitDiffStats(ctx.directory)
|
const gitStats = collectGitDiffStats(ctx.directory)
|
||||||
const fileChanges = formatFileChanges(gitStats)
|
const fileChanges = formatFileChanges(gitStats)
|
||||||
const subagentSessionId = extractSessionIdFromOutput(toolOutput.output)
|
const extractedSessionId = extractSessionIdFromOutput(toolOutput.output)
|
||||||
|
|
||||||
const boulderState = readBoulderState(ctx.directory)
|
const boulderState = readBoulderState(ctx.directory)
|
||||||
if (boulderState) {
|
if (boulderState) {
|
||||||
const progress = getPlanProgress(boulderState.active_plan)
|
const progress = getPlanProgress(boulderState.active_plan)
|
||||||
|
const {
|
||||||
|
currentTask,
|
||||||
|
shouldSkipTaskSessionUpdate,
|
||||||
|
shouldIgnoreCurrentSessionId,
|
||||||
|
} = resolveTaskContext(pendingTaskRef, boulderState.active_plan)
|
||||||
|
const trackedTaskSession = currentTask
|
||||||
|
? getTaskSessionState(ctx.directory, currentTask.key)
|
||||||
|
: null
|
||||||
const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined
|
const sessionState = toolInput.sessionID ? getState(toolInput.sessionID) : undefined
|
||||||
|
|
||||||
if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) {
|
if (toolInput.sessionID && !boulderState.session_ids?.includes(toolInput.sessionID)) {
|
||||||
@@ -82,6 +145,31 @@ export function createToolExecuteAfterHandler(input: {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const lineageSessionIDs = toolInput.sessionID && !boulderState.session_ids.includes(toolInput.sessionID)
|
||||||
|
? [...boulderState.session_ids, toolInput.sessionID]
|
||||||
|
: boulderState.session_ids
|
||||||
|
const subagentSessionId = await validateSubagentSessionId({
|
||||||
|
client: ctx.client,
|
||||||
|
sessionID: extractedSessionId,
|
||||||
|
lineageSessionIDs,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (currentTask && subagentSessionId && !shouldSkipTaskSessionUpdate) {
|
||||||
|
upsertTaskSessionState(ctx.directory, {
|
||||||
|
taskKey: currentTask.key,
|
||||||
|
taskLabel: currentTask.label,
|
||||||
|
taskTitle: currentTask.title,
|
||||||
|
sessionId: subagentSessionId,
|
||||||
|
agent: toolOutput.metadata?.agent as string | undefined,
|
||||||
|
category: toolOutput.metadata?.category as string | undefined,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const preferredSessionId = resolvePreferredSessionId(
|
||||||
|
shouldIgnoreCurrentSessionId ? undefined : subagentSessionId,
|
||||||
|
trackedTaskSession?.session_id,
|
||||||
|
)
|
||||||
|
|
||||||
// Preserve original subagent response - critical for debugging failed tasks
|
// Preserve original subagent response - critical for debugging failed tasks
|
||||||
const originalResponse = toolOutput.output
|
const originalResponse = toolOutput.output
|
||||||
const shouldPauseForApproval = sessionState
|
const shouldPauseForApproval = sessionState
|
||||||
@@ -102,11 +190,11 @@ export function createToolExecuteAfterHandler(input: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const leadReminder = shouldPauseForApproval
|
const leadReminder = shouldPauseForApproval
|
||||||
? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId)
|
? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, preferredSessionId)
|
||||||
: buildCompletionGate(boulderState.plan_name, subagentSessionId)
|
: buildCompletionGate(boulderState.plan_name, preferredSessionId)
|
||||||
const followupReminder = shouldPauseForApproval
|
const followupReminder = shouldPauseForApproval
|
||||||
? null
|
? null
|
||||||
: buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)
|
: buildOrchestratorReminder(boulderState.plan_name, progress, preferredSessionId, autoCommit, false)
|
||||||
|
|
||||||
toolOutput.output = `
|
toolOutput.output = `
|
||||||
<system-reminder>
|
<system-reminder>
|
||||||
@@ -132,10 +220,22 @@ ${
|
|||||||
plan: boulderState.plan_name,
|
plan: boulderState.plan_name,
|
||||||
progress: `${progress.completed}/${progress.total}`,
|
progress: `${progress.completed}/${progress.total}`,
|
||||||
fileCount: gitStats.length,
|
fileCount: gitStats.length,
|
||||||
|
preferredSessionId,
|
||||||
waitingForFinalWaveApproval: shouldPauseForApproval,
|
waitingForFinalWaveApproval: shouldPauseForApproval,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(subagentSessionId)}\n</system-reminder>`
|
const lineageSessionIDs = toolInput.sessionID ? [toolInput.sessionID] : []
|
||||||
|
const subagentSessionId = await validateSubagentSessionId({
|
||||||
|
client: ctx.client,
|
||||||
|
sessionID: extractedSessionId,
|
||||||
|
lineageSessionIDs,
|
||||||
|
})
|
||||||
|
const preferredSessionId = pendingTaskRef?.kind === "skip"
|
||||||
|
? undefined
|
||||||
|
: subagentSessionId
|
||||||
|
toolOutput.output += `\n<system-reminder>\n${buildStandaloneVerificationReminder(
|
||||||
|
resolvePreferredSessionId(preferredSessionId),
|
||||||
|
)}\n</system-reminder>`
|
||||||
|
|
||||||
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
log(`[${HOOK_NAME}] Verification reminder appended for orchestrator`, {
|
||||||
sessionID: toolInput.sessionID,
|
sessionID: toolInput.sessionID,
|
||||||
|
|||||||
@@ -2,19 +2,26 @@ import { log } from "../../shared/logger"
|
|||||||
import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
|
import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
|
||||||
import { isCallerOrchestrator } from "../../shared/session-utils"
|
import { isCallerOrchestrator } from "../../shared/session-utils"
|
||||||
import type { PluginInput } from "@opencode-ai/plugin"
|
import type { PluginInput } from "@opencode-ai/plugin"
|
||||||
|
import { readBoulderState, readCurrentTopLevelTask } from "../../features/boulder-state"
|
||||||
import { HOOK_NAME } from "./hook-name"
|
import { HOOK_NAME } from "./hook-name"
|
||||||
import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates"
|
import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates"
|
||||||
import { isSisyphusPath } from "./sisyphus-path"
|
import { isSisyphusPath } from "./sisyphus-path"
|
||||||
|
import type { PendingTaskRef, TrackedTopLevelTaskRef } from "./types"
|
||||||
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
import { isWriteOrEditToolName } from "./write-edit-tool-policy"
|
||||||
|
|
||||||
export function createToolExecuteBeforeHandler(input: {
|
export function createToolExecuteBeforeHandler(input: {
|
||||||
ctx: PluginInput
|
ctx: PluginInput
|
||||||
pendingFilePaths: Map<string, string>
|
pendingFilePaths: Map<string, string>
|
||||||
|
pendingTaskRefs: Map<string, PendingTaskRef>
|
||||||
}): (
|
}): (
|
||||||
toolInput: { tool: string; sessionID?: string; callID?: string },
|
toolInput: { tool: string; sessionID?: string; callID?: string },
|
||||||
toolOutput: { args: Record<string, unknown>; message?: string }
|
toolOutput: { args: Record<string, unknown>; message?: string }
|
||||||
) => Promise<void> {
|
) => Promise<void> {
|
||||||
const { ctx, pendingFilePaths } = input
|
const { ctx, pendingFilePaths, pendingTaskRefs } = input
|
||||||
|
|
||||||
|
function trackTask(callID: string, task: TrackedTopLevelTaskRef): void {
|
||||||
|
pendingTaskRefs.set(callID, { kind: "track", task })
|
||||||
|
}
|
||||||
|
|
||||||
return async (toolInput, toolOutput): Promise<void> => {
|
return async (toolInput, toolOutput): Promise<void> => {
|
||||||
if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) {
|
if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) {
|
||||||
@@ -43,6 +50,46 @@ export function createToolExecuteBeforeHandler(input: {
|
|||||||
|
|
||||||
// Check task - inject single-task directive
|
// Check task - inject single-task directive
|
||||||
if (toolInput.tool === "task") {
|
if (toolInput.tool === "task") {
|
||||||
|
if (toolInput.callID) {
|
||||||
|
const requestedSessionId = toolOutput.args.session_id as string | undefined
|
||||||
|
if (requestedSessionId) {
|
||||||
|
pendingTaskRefs.set(toolInput.callID, {
|
||||||
|
kind: "skip",
|
||||||
|
reason: "explicit_resume",
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
const boulderState = readBoulderState(ctx.directory)
|
||||||
|
const currentTask = boulderState
|
||||||
|
? readCurrentTopLevelTask(boulderState.active_plan)
|
||||||
|
: null
|
||||||
|
if (currentTask) {
|
||||||
|
const task = {
|
||||||
|
key: currentTask.key,
|
||||||
|
label: currentTask.label,
|
||||||
|
title: currentTask.title,
|
||||||
|
}
|
||||||
|
const hasExistingClaim = [...pendingTaskRefs.values()].some((pendingTaskRef) => (
|
||||||
|
pendingTaskRef.kind === "track" && pendingTaskRef.task.key === task.key
|
||||||
|
))
|
||||||
|
|
||||||
|
if (hasExistingClaim) {
|
||||||
|
pendingTaskRefs.set(toolInput.callID, {
|
||||||
|
kind: "skip",
|
||||||
|
reason: "ambiguous_task_key",
|
||||||
|
task,
|
||||||
|
})
|
||||||
|
log(`[${HOOK_NAME}] Skipping task session persistence for ambiguous task key`, {
|
||||||
|
sessionID: toolInput.sessionID,
|
||||||
|
callID: toolInput.callID,
|
||||||
|
taskKey: task.key,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
trackTask(toolInput.callID, task)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const prompt = toolOutput.args.prompt as string | undefined
|
const prompt = toolOutput.args.prompt as string | undefined
|
||||||
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
|
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
|
||||||
toolOutput.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
|
toolOutput.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import type { AgentOverrides } from "../../config"
|
import type { AgentOverrides } from "../../config"
|
||||||
import type { BackgroundManager } from "../../features/background-agent"
|
import type { BackgroundManager } from "../../features/background-agent"
|
||||||
|
import type { TopLevelTaskRef } from "../../features/boulder-state"
|
||||||
|
|
||||||
export type ModelInfo = { providerID: string; modelID: string }
|
export type ModelInfo = { providerID: string; modelID: string }
|
||||||
|
|
||||||
@@ -25,6 +26,13 @@ export interface ToolExecuteAfterOutput {
|
|||||||
metadata: Record<string, unknown>
|
metadata: Record<string, unknown>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type TrackedTopLevelTaskRef = Pick<TopLevelTaskRef, "key" | "label" | "title">
|
||||||
|
|
||||||
|
export type PendingTaskRef =
|
||||||
|
| { kind: "track"; task: TrackedTopLevelTaskRef }
|
||||||
|
| { kind: "skip"; reason: "explicit_resume" }
|
||||||
|
| { kind: "skip"; reason: "ambiguous_task_key"; task: TrackedTopLevelTaskRef }
|
||||||
|
|
||||||
export interface SessionState {
|
export interface SessionState {
|
||||||
lastEventWasAbortError?: boolean
|
lastEventWasAbortError?: boolean
|
||||||
lastContinuationInjectedAt?: number
|
lastContinuationInjectedAt?: number
|
||||||
|
|||||||
@@ -1,5 +1,14 @@
|
|||||||
import { VERIFICATION_REMINDER } from "./system-reminder-templates"
|
import { VERIFICATION_REMINDER } from "./system-reminder-templates"
|
||||||
|
|
||||||
|
function buildReuseHint(sessionId: string): string {
|
||||||
|
return `
|
||||||
|
**PREFERRED REUSE SESSION FOR THE CURRENT TOP-LEVEL PLAN TASK**
|
||||||
|
|
||||||
|
- Reuse \`${sessionId}\` first if verification fails or the result needs follow-up.
|
||||||
|
- Start a fresh subagent session only when reuse is unavailable or would cross task boundaries.
|
||||||
|
`
|
||||||
|
}
|
||||||
|
|
||||||
export function buildCompletionGate(planName: string, sessionId: string): string {
|
export function buildCompletionGate(planName: string, sessionId: string): string {
|
||||||
return `
|
return `
|
||||||
**COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE**
|
**COMPLETION GATE — DO NOT PROCEED UNTIL THIS IS DONE**
|
||||||
@@ -25,7 +34,8 @@ task(session_id="${sessionId}", prompt="fix: checkbox not recorded correctly")
|
|||||||
|
|
||||||
**Your completion is NOT tracked until the checkbox is marked in the plan file.**
|
**Your completion is NOT tracked until the checkbox is marked in the plan file.**
|
||||||
|
|
||||||
**VERIFICATION_REMINDER**`
|
**VERIFICATION_REMINDER**
|
||||||
|
${buildReuseHint(sessionId)}`
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildVerificationReminder(sessionId: string): string {
|
function buildVerificationReminder(sessionId: string): string {
|
||||||
@@ -38,7 +48,9 @@ ${VERIFICATION_REMINDER}
|
|||||||
**If ANY verification fails, use this immediately:**
|
**If ANY verification fails, use this immediately:**
|
||||||
\`\`\`
|
\`\`\`
|
||||||
task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
|
task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
|
||||||
\`\`\``
|
\`\`\`
|
||||||
|
|
||||||
|
${buildReuseHint(sessionId)}`
|
||||||
}
|
}
|
||||||
|
|
||||||
export function buildOrchestratorReminder(
|
export function buildOrchestratorReminder(
|
||||||
|
|||||||
Reference in New Issue
Block a user