From f796fdbe0a44b73073bec7291fc328e145475b18 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Fri, 6 Feb 2026 11:21:37 +0900 Subject: [PATCH 01/30] feat(hooks): add TASK_CONTINUATION system directive and hook name --- src/config/schema.ts | 1 + src/shared/system-directive.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/src/config/schema.ts b/src/config/schema.ts index d406ee715..ce4bfb7f1 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -57,6 +57,7 @@ export const AgentNameSchema = BuiltinAgentNameSchema export const HookNameSchema = z.enum([ "todo-continuation-enforcer", + "task-continuation-enforcer", "context-window-monitor", "session-recovery", "session-notification", diff --git a/src/shared/system-directive.ts b/src/shared/system-directive.ts index f2ae8c602..0b8ba4f9b 100644 --- a/src/shared/system-directive.ts +++ b/src/shared/system-directive.ts @@ -48,6 +48,7 @@ export function removeSystemReminders(text: string): string { export const SystemDirectiveTypes = { TODO_CONTINUATION: "TODO CONTINUATION", + TASK_CONTINUATION: "TASK CONTINUATION", RALPH_LOOP: "RALPH LOOP", BOULDER_CONTINUATION: "BOULDER CONTINUATION", DELEGATION_REQUIRED: "DELEGATION REQUIRED", From f4a9d0c3aa3649327978ca211cf3a395197069aa Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Fri, 6 Feb 2026 11:21:45 +0900 Subject: [PATCH 02/30] feat(hooks): implement task-continuation-enforcer with TDD Mirrors todo-continuation-enforcer but reads from file-based task storage instead of OpenCode's todo API. Includes 19 tests covering all skip conditions, abort detection, countdown, and recovery scenarios. --- src/hooks/task-continuation-enforcer.test.ts | 763 +++++++++++++++++++ src/hooks/task-continuation-enforcer.ts | 530 +++++++++++++ 2 files changed, 1293 insertions(+) create mode 100644 src/hooks/task-continuation-enforcer.test.ts create mode 100644 src/hooks/task-continuation-enforcer.ts diff --git a/src/hooks/task-continuation-enforcer.test.ts b/src/hooks/task-continuation-enforcer.test.ts new file mode 100644 index 000000000..1a0cbc75d --- /dev/null +++ b/src/hooks/task-continuation-enforcer.test.ts @@ -0,0 +1,763 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test" + +import { mkdtempSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" + +import { BackgroundManager } from "../features/background-agent" +import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state" +import type { OhMyOpenCodeConfig } from "../config/schema" +import { TaskObjectSchema } from "../tools/task/types" +import type { TaskObject } from "../tools/task/types" +import { createTaskContinuationEnforcer } from "./task-continuation-enforcer" + +type TimerCallback = (...args: any[]) => void + +interface FakeTimers { + advanceBy: (ms: number, advanceClock?: boolean) => Promise + restore: () => void +} + +function createFakeTimers(): FakeTimers { + const originalNow = Date.now() + let clockNow = originalNow + let timerNow = 0 + let nextId = 1 + const timers = new Map() + const cleared = new Set() + + const original = { + setTimeout: globalThis.setTimeout, + clearTimeout: globalThis.clearTimeout, + setInterval: globalThis.setInterval, + clearInterval: globalThis.clearInterval, + dateNow: Date.now, + } + + const normalizeDelay = (delay?: number) => { + if (typeof delay !== "number" || !Number.isFinite(delay)) return 0 + return delay < 0 ? 0 : delay + } + + const schedule = (callback: TimerCallback, delay: number | undefined, interval: number | null, args: any[]) => { + const id = nextId++ + timers.set(id, { + id, + time: timerNow + normalizeDelay(delay), + interval, + callback, + args, + }) + return id + } + + const clear = (id: number | undefined) => { + if (typeof id !== "number") return + cleared.add(id) + timers.delete(id) + } + + globalThis.setTimeout = ((callback: TimerCallback, delay?: number, ...args: any[]) => { + return schedule(callback, delay, null, args) as unknown as ReturnType + }) as typeof setTimeout + + globalThis.setInterval = ((callback: TimerCallback, delay?: number, ...args: any[]) => { + const interval = normalizeDelay(delay) + return schedule(callback, delay, interval, args) as unknown as ReturnType + }) as typeof setInterval + + globalThis.clearTimeout = ((id?: number) => { + clear(id) + }) as typeof clearTimeout + + globalThis.clearInterval = ((id?: number) => { + clear(id) + }) as typeof clearInterval + + Date.now = () => clockNow + + const advanceBy = async (ms: number, advanceClock: boolean = false) => { + const clamped = Math.max(0, ms) + const target = timerNow + clamped + if (advanceClock) { + clockNow += clamped + } + while (true) { + let next: { id: number; time: number; interval: number | null; callback: TimerCallback; args: any[] } | undefined + for (const timer of timers.values()) { + if (timer.time <= target && (!next || timer.time < next.time)) { + next = timer + } + } + if (!next) break + + timerNow = next.time + timers.delete(next.id) + next.callback(...next.args) + + if (next.interval !== null && !cleared.has(next.id)) { + timers.set(next.id, { + id: next.id, + time: timerNow + next.interval, + interval: next.interval, + callback: next.callback, + args: next.args, + }) + } else { + cleared.delete(next.id) + } + + await Promise.resolve() + } + timerNow = target + await Promise.resolve() + } + + const restore = () => { + globalThis.setTimeout = original.setTimeout + globalThis.clearTimeout = original.clearTimeout + globalThis.setInterval = original.setInterval + globalThis.clearInterval = original.clearInterval + Date.now = original.dateNow + } + + return { advanceBy, restore } +} + +const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) + +describe("task-continuation-enforcer", () => { + let promptCalls: Array<{ sessionID: string; agent?: string; model?: { providerID?: string; modelID?: string }; text: string }> + let toastCalls: Array<{ title: string; message: string }> + let fakeTimers: FakeTimers + let taskDir: string + + interface MockMessage { + info: { + id: string + role: "user" | "assistant" + error?: { name: string; data?: { message: string } } + } + } + + let mockMessages: MockMessage[] = [] + + function createMockPluginInput() { + return { + client: { + session: { + messages: async () => ({ data: mockMessages }), + prompt: async (opts: any) => { + promptCalls.push({ + sessionID: opts.path.id, + agent: opts.body.agent, + model: opts.body.model, + text: opts.body.parts[0].text, + }) + return {} + }, + }, + tui: { + showToast: async (opts: any) => { + toastCalls.push({ + title: opts.body.title, + message: opts.body.message, + }) + return {} + }, + }, + }, + directory: "/tmp/test", + } as any + } + + function createTempTaskDir(): string { + return mkdtempSync(join(tmpdir(), "omo-task-continuation-")) + } + + function writeTaskFile(dir: string, task: TaskObject): void { + const parsed = TaskObjectSchema.safeParse(task) + expect(parsed.success).toBe(true) + if (!parsed.success) return + writeFileSync(join(dir, `${parsed.data.id}.json`), JSON.stringify(parsed.data), "utf-8") + } + + function writeCorruptedTaskFile(dir: string, taskId: string): void { + writeFileSync(join(dir, `${taskId}.json`), "{ this is not valid json", "utf-8") + } + + function createConfig(dir: string): Partial { + return { + sisyphus: { + tasks: { + claude_code_compat: true, + storage_path: dir, + }, + }, + } + } + + function createMockBackgroundManager(runningTasks: boolean = false): BackgroundManager { + return { + getTasksByParentSession: () => (runningTasks ? [{ status: "running" }] : []), + } as any + } + + beforeEach(() => { + fakeTimers = createFakeTimers() + _resetForTesting() + promptCalls = [] + toastCalls = [] + mockMessages = [] + taskDir = createTempTaskDir() + }) + + afterEach(() => { + fakeTimers.restore() + _resetForTesting() + rmSync(taskDir, { recursive: true, force: true }) + }) + + test("should inject continuation when idle with incomplete tasks on disk", async () => { + fakeTimers.restore() + // given - main session with incomplete tasks + const sessionID = "main-123" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + writeTaskFile(taskDir, { + id: "T-2", + subject: "Task 2", + description: "", + status: "completed", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), { + backgroundManager: new BackgroundManager(createMockPluginInput()), + }) + + // when - session goes idle + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // then - countdown toast shown + await wait(50) + expect(toastCalls.length).toBeGreaterThanOrEqual(1) + expect(toastCalls[0].title).toBe("Task Continuation") + + // then - after countdown, continuation injected + await wait(2500) + expect(promptCalls.length).toBe(1) + expect(promptCalls[0].text).toContain("TASK CONTINUATION") + }, { timeout: 15000 }) + + test("should NOT inject when all tasks are completed", async () => { + // given - session with all tasks completed + const sessionID = "main-456" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "completed", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when - session goes idle + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then - no continuation injected + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject when all tasks are deleted", async () => { + // given - session with all tasks deleted + const sessionID = "main-deleted" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "deleted", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject when no task files exist", async () => { + // given - empty task directory + const sessionID = "main-none" + setMainSession(sessionID) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject when background tasks are running", async () => { + // given - session with incomplete tasks and running background tasks + const sessionID = "main-bg-running" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), { + backgroundManager: createMockBackgroundManager(true), + }) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should NOT inject for non-main session", async () => { + // given - main session set, different session goes idle + setMainSession("main-session") + const otherSession = "other-session" + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: otherSession } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should inject for background task session (subagent)", async () => { + fakeTimers.restore() + // given - main session set, background task session registered + setMainSession("main-session") + const bgTaskSession = "bg-task-session" + subagentSessions.add(bgTaskSession) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: bgTaskSession } } }) + + // then + await wait(2500) + expect(promptCalls.length).toBe(1) + expect(promptCalls[0].sessionID).toBe(bgTaskSession) + }, { timeout: 15000 }) + + test("should cancel countdown on user message after grace period", async () => { + // given + const sessionID = "main-cancel" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when - session goes idle + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // when - wait past grace period (500ms), then user sends message + await fakeTimers.advanceBy(600, true) + await hook.handler({ + event: { + type: "message.updated", + properties: { info: { sessionID, role: "user" } }, + }, + }) + + // then + await fakeTimers.advanceBy(2500) + expect(promptCalls).toHaveLength(0) + }) + + test("should ignore user message within grace period", async () => { + fakeTimers.restore() + // given + const sessionID = "main-grace" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await hook.handler({ + event: { + type: "message.updated", + properties: { info: { sessionID, role: "user" } }, + }, + }) + + // then - countdown should continue + await wait(2500) + expect(promptCalls).toHaveLength(1) + }, { timeout: 15000 }) + + test("should cancel countdown on assistant activity", async () => { + // given + const sessionID = "main-assistant" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + await hook.handler({ + event: { + type: "message.part.updated", + properties: { info: { sessionID, role: "assistant" } }, + }, + }) + + // then + await fakeTimers.advanceBy(3000) + expect(promptCalls).toHaveLength(0) + }) + + test("should cancel countdown on tool execution", async () => { + // given + const sessionID = "main-tool" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + await hook.handler({ event: { type: "tool.execute.before", properties: { sessionID } } }) + + // then + await fakeTimers.advanceBy(3000) + expect(promptCalls).toHaveLength(0) + }) + + test("should skip injection during recovery mode", async () => { + // given + const sessionID = "main-recovery" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + hook.markRecovering(sessionID) + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should inject after recovery complete", async () => { + fakeTimers.restore() + // given + const sessionID = "main-recovery-done" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + hook.markRecovering(sessionID) + hook.markRecoveryComplete(sessionID) + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // then + await wait(3000) + expect(promptCalls.length).toBe(1) + }, { timeout: 15000 }) + + test("should cleanup on session deleted", async () => { + // given + const sessionID = "main-delete" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + await hook.handler({ event: { type: "session.deleted", properties: { info: { id: sessionID } } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should skip when last assistant message was aborted (API fallback)", async () => { + // given + const sessionID = "main-api-abort" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + mockMessages = [ + { info: { id: "msg-1", role: "user" } }, + { info: { id: "msg-2", role: "assistant", error: { name: "MessageAbortedError", data: { message: "aborted" } } } }, + ] + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should skip when abort detected via session.error event", async () => { + // given + const sessionID = "main-event-abort" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + mockMessages = [ + { info: { id: "msg-1", role: "user" } }, + { info: { id: "msg-2", role: "assistant" } }, + ] + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when - abort error event fires + await hook.handler({ + event: { + type: "session.error", + properties: { sessionID, error: { name: "MessageAbortedError" } }, + }, + }) + + // when - session goes idle immediately after + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should handle corrupted task files gracefully (readJsonSafe returns null)", async () => { + fakeTimers.restore() + // given + const sessionID = "main-corrupt" + setMainSession(sessionID) + + writeCorruptedTaskFile(taskDir, "T-corrupt") + writeTaskFile(taskDir, { + id: "T-ok", + subject: "Task OK", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await wait(2500) + + // then + expect(promptCalls).toHaveLength(1) + }, { timeout: 15000 }) + + test("should NOT inject when isContinuationStopped returns true", async () => { + // given + const sessionID = "main-stopped" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), { + isContinuationStopped: (id) => id === sessionID, + }) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) + + test("should cancel all countdowns via cancelAllCountdowns", async () => { + // given + const sessionID = "main-cancel-all" + setMainSession(sessionID) + + writeTaskFile(taskDir, { + id: "T-1", + subject: "Task 1", + description: "", + status: "pending", + blocks: [], + blockedBy: [], + threadID: "test", + }) + + const hook = createTaskContinuationEnforcer(createMockPluginInput(), createConfig(taskDir), {}) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + await fakeTimers.advanceBy(500) + hook.cancelAllCountdowns() + await fakeTimers.advanceBy(3000) + + // then + expect(promptCalls).toHaveLength(0) + }) +}) diff --git a/src/hooks/task-continuation-enforcer.ts b/src/hooks/task-continuation-enforcer.ts new file mode 100644 index 000000000..f3b7f9c54 --- /dev/null +++ b/src/hooks/task-continuation-enforcer.ts @@ -0,0 +1,530 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import { existsSync, readdirSync } from "node:fs" +import { join } from "node:path" + +import type { BackgroundManager } from "../features/background-agent" +import { getMainSessionID, subagentSessions } from "../features/claude-code-session-state" +import { + findNearestMessageWithFields, + MESSAGE_STORAGE, + type ToolPermission, +} from "../features/hook-message-injector" +import { listTaskFiles, readJsonSafe, getTaskDir } from "../features/claude-tasks/storage" +import type { OhMyOpenCodeConfig } from "../config/schema" +import { TaskObjectSchema } from "../tools/task/types" +import type { TaskObject } from "../tools/task/types" +import { log } from "../shared/logger" +import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-directive" + +const HOOK_NAME = "task-continuation-enforcer" + +const DEFAULT_SKIP_AGENTS = ["prometheus", "compaction"] + +export interface TaskContinuationEnforcerOptions { + backgroundManager?: BackgroundManager + skipAgents?: string[] + isContinuationStopped?: (sessionID: string) => boolean +} + +export interface TaskContinuationEnforcer { + handler: (input: { event: { type: string; properties?: unknown } }) => Promise + markRecovering: (sessionID: string) => void + markRecoveryComplete: (sessionID: string) => void + cancelAllCountdowns: () => void +} + +interface SessionState { + countdownTimer?: ReturnType + countdownInterval?: ReturnType + isRecovering?: boolean + countdownStartedAt?: number + abortDetectedAt?: number +} + +const CONTINUATION_PROMPT = `${createSystemDirective(SystemDirectiveTypes.TASK_CONTINUATION)} + +Incomplete tasks remain in your task list. Continue working on the next pending task. + +- Proceed without asking for permission +- Mark each task complete when finished +- Do not stop until all tasks are done` + +const COUNTDOWN_SECONDS = 2 +const TOAST_DURATION_MS = 900 +const COUNTDOWN_GRACE_PERIOD_MS = 500 + +function getMessageDir(sessionID: string): string | null { + if (!existsSync(MESSAGE_STORAGE)) return null + + const directPath = join(MESSAGE_STORAGE, sessionID) + if (existsSync(directPath)) return directPath + + for (const dir of readdirSync(MESSAGE_STORAGE)) { + const sessionPath = join(MESSAGE_STORAGE, dir, sessionID) + if (existsSync(sessionPath)) return sessionPath + } + + return null +} + +function getIncompleteCount(tasks: TaskObject[]): number { + return tasks.filter(t => t.status !== "completed" && t.status !== "deleted").length +} + +interface MessageInfo { + id?: string + role?: string + error?: { name?: string; data?: unknown } +} + +function isLastAssistantMessageAborted(messages: Array<{ info?: MessageInfo }>): boolean { + if (!messages || messages.length === 0) return false + + const assistantMessages = messages.filter(m => m.info?.role === "assistant") + if (assistantMessages.length === 0) return false + + const lastAssistant = assistantMessages[assistantMessages.length - 1] + const errorName = lastAssistant.info?.error?.name + + if (!errorName) return false + + return errorName === "MessageAbortedError" || errorName === "AbortError" +} + +function loadTasksFromDisk(config: Partial): TaskObject[] { + const taskIds = listTaskFiles(config) + const taskDirectory = getTaskDir(config) + const tasks: TaskObject[] = [] + + for (const id of taskIds) { + const task = readJsonSafe(join(taskDirectory, `${id}.json`), TaskObjectSchema) + if (task) tasks.push(task) + } + + return tasks +} + +export function createTaskContinuationEnforcer( + ctx: PluginInput, + config: Partial, + options: TaskContinuationEnforcerOptions = {} +): TaskContinuationEnforcer { + const { backgroundManager, skipAgents = DEFAULT_SKIP_AGENTS, isContinuationStopped } = options + const sessions = new Map() + + function getState(sessionID: string): SessionState { + let state = sessions.get(sessionID) + if (!state) { + state = {} + sessions.set(sessionID, state) + } + return state + } + + function cancelCountdown(sessionID: string): void { + const state = sessions.get(sessionID) + if (!state) return + + if (state.countdownTimer) { + clearTimeout(state.countdownTimer) + state.countdownTimer = undefined + } + if (state.countdownInterval) { + clearInterval(state.countdownInterval) + state.countdownInterval = undefined + } + state.countdownStartedAt = undefined + } + + function cleanup(sessionID: string): void { + cancelCountdown(sessionID) + sessions.delete(sessionID) + } + + const markRecovering = (sessionID: string): void => { + const state = getState(sessionID) + state.isRecovering = true + cancelCountdown(sessionID) + log(`[${HOOK_NAME}] Session marked as recovering`, { sessionID }) + } + + const markRecoveryComplete = (sessionID: string): void => { + const state = sessions.get(sessionID) + if (state) { + state.isRecovering = false + log(`[${HOOK_NAME}] Session recovery complete`, { sessionID }) + } + } + + async function showCountdownToast(seconds: number, incompleteCount: number): Promise { + await ctx.client.tui + .showToast({ + body: { + title: "Task Continuation", + message: `Resuming in ${seconds}s... (${incompleteCount} tasks remaining)`, + variant: "warning" as const, + duration: TOAST_DURATION_MS, + }, + }) + .catch(() => {}) + } + + interface ResolvedMessageInfo { + agent?: string + model?: { providerID: string; modelID: string } + tools?: Record + } + + async function injectContinuation( + sessionID: string, + incompleteCount: number, + total: number, + resolvedInfo?: ResolvedMessageInfo + ): Promise { + const state = sessions.get(sessionID) + + if (state?.isRecovering) { + log(`[${HOOK_NAME}] Skipped injection: in recovery`, { sessionID }) + return + } + + const hasRunningBgTasks = backgroundManager + ? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running") + : false + + if (hasRunningBgTasks) { + log(`[${HOOK_NAME}] Skipped injection: background tasks running`, { sessionID }) + return + } + + const tasks = loadTasksFromDisk(config) + const freshIncompleteCount = getIncompleteCount(tasks) + if (freshIncompleteCount === 0) { + log(`[${HOOK_NAME}] Skipped injection: no incomplete tasks`, { sessionID }) + return + } + + let agentName = resolvedInfo?.agent + let model = resolvedInfo?.model + let tools = resolvedInfo?.tools + + if (!agentName || !model) { + const messageDir = getMessageDir(sessionID) + const prevMessage = messageDir ? findNearestMessageWithFields(messageDir) : null + agentName = agentName ?? prevMessage?.agent + model = + model ?? + (prevMessage?.model?.providerID && prevMessage?.model?.modelID + ? { + providerID: prevMessage.model.providerID, + modelID: prevMessage.model.modelID, + ...(prevMessage.model.variant ? { variant: prevMessage.model.variant } : {}), + } + : undefined) + tools = tools ?? prevMessage?.tools + } + + if (agentName && skipAgents.includes(agentName)) { + log(`[${HOOK_NAME}] Skipped: agent in skipAgents list`, { sessionID, agent: agentName }) + return + } + + const editPermission = tools?.edit + const writePermission = tools?.write + const hasWritePermission = + !tools || + (editPermission !== false && editPermission !== "deny" && writePermission !== false && writePermission !== "deny") + if (!hasWritePermission) { + log(`[${HOOK_NAME}] Skipped: agent lacks write permission`, { sessionID, agent: agentName }) + return + } + + const incompleteTasks = tasks.filter(t => t.status !== "completed" && t.status !== "deleted") + const taskList = incompleteTasks.map(t => `- [${t.status}] ${t.subject}`).join("\n") + const prompt = `${CONTINUATION_PROMPT} + +[Status: ${tasks.length - freshIncompleteCount}/${tasks.length} completed, ${freshIncompleteCount} remaining] + +Remaining tasks: +${taskList}` + + try { + log(`[${HOOK_NAME}] Injecting continuation`, { + sessionID, + agent: agentName, + model, + incompleteCount: freshIncompleteCount, + }) + + await ctx.client.session.prompt({ + path: { id: sessionID }, + body: { + agent: agentName, + ...(model !== undefined ? { model } : {}), + parts: [{ type: "text", text: prompt }], + }, + query: { directory: ctx.directory }, + }) + + log(`[${HOOK_NAME}] Injection successful`, { sessionID }) + } catch (err) { + log(`[${HOOK_NAME}] Injection failed`, { sessionID, error: String(err) }) + } + } + + function startCountdown( + sessionID: string, + incompleteCount: number, + total: number, + resolvedInfo?: ResolvedMessageInfo + ): void { + const state = getState(sessionID) + cancelCountdown(sessionID) + + let secondsRemaining = COUNTDOWN_SECONDS + showCountdownToast(secondsRemaining, incompleteCount) + state.countdownStartedAt = Date.now() + + state.countdownInterval = setInterval(() => { + secondsRemaining-- + if (secondsRemaining > 0) { + showCountdownToast(secondsRemaining, incompleteCount) + } + }, 1000) + + state.countdownTimer = setTimeout(() => { + cancelCountdown(sessionID) + injectContinuation(sessionID, incompleteCount, total, resolvedInfo) + }, COUNTDOWN_SECONDS * 1000) + + log(`[${HOOK_NAME}] Countdown started`, { sessionID, seconds: COUNTDOWN_SECONDS, incompleteCount }) + } + + const handler = async ({ event }: { event: { type: string; properties?: unknown } }): Promise => { + const props = event.properties as Record | undefined + + if (event.type === "session.error") { + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + const error = props?.error as { name?: string } | undefined + if (error?.name === "MessageAbortedError" || error?.name === "AbortError") { + const state = getState(sessionID) + state.abortDetectedAt = Date.now() + log(`[${HOOK_NAME}] Abort detected via session.error`, { sessionID, errorName: error.name }) + } + + cancelCountdown(sessionID) + log(`[${HOOK_NAME}] session.error`, { sessionID }) + return + } + + if (event.type === "session.idle") { + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + log(`[${HOOK_NAME}] session.idle`, { sessionID }) + + const mainSessionID = getMainSessionID() + const isMainSession = sessionID === mainSessionID + const isBackgroundTaskSession = subagentSessions.has(sessionID) + + if (mainSessionID && !isMainSession && !isBackgroundTaskSession) { + log(`[${HOOK_NAME}] Skipped: not main or background task session`, { sessionID }) + return + } + + const state = getState(sessionID) + + if (state.isRecovering) { + log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID }) + return + } + + // Check 1: Event-based abort detection (primary, most reliable) + if (state.abortDetectedAt) { + const timeSinceAbort = Date.now() - state.abortDetectedAt + const ABORT_WINDOW_MS = 3000 + if (timeSinceAbort < ABORT_WINDOW_MS) { + log(`[${HOOK_NAME}] Skipped: abort detected via event ${timeSinceAbort}ms ago`, { sessionID }) + state.abortDetectedAt = undefined + return + } + state.abortDetectedAt = undefined + } + + const hasRunningBgTasks = backgroundManager + ? backgroundManager.getTasksByParentSession(sessionID).some(t => t.status === "running") + : false + + if (hasRunningBgTasks) { + log(`[${HOOK_NAME}] Skipped: background tasks running`, { sessionID }) + return + } + + // Check 2: API-based abort detection (fallback, for cases where event was missed) + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const messages = (messagesResp as { data?: Array<{ info?: MessageInfo }> }).data ?? [] + + if (isLastAssistantMessageAborted(messages)) { + log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID }) + return + } + } catch (err) { + log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(err) }) + } + + const tasks = loadTasksFromDisk(config) + + if (!tasks || tasks.length === 0) { + log(`[${HOOK_NAME}] No tasks`, { sessionID }) + return + } + + const incompleteCount = getIncompleteCount(tasks) + if (incompleteCount === 0) { + log(`[${HOOK_NAME}] All tasks complete`, { sessionID, total: tasks.length }) + return + } + + let resolvedInfo: ResolvedMessageInfo | undefined + let hasCompactionMessage = false + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + }) + const messages = (messagesResp.data ?? []) as Array<{ + info?: { + agent?: string + model?: { providerID: string; modelID: string } + modelID?: string + providerID?: string + tools?: Record + } + }> + for (let i = messages.length - 1; i >= 0; i--) { + const info = messages[i].info + if (info?.agent === "compaction") { + hasCompactionMessage = true + continue + } + if (info?.agent || info?.model || (info?.modelID && info?.providerID)) { + resolvedInfo = { + agent: info.agent, + model: + info.model ?? + (info.providerID && info.modelID + ? { providerID: info.providerID, modelID: info.modelID } + : undefined), + tools: info.tools, + } + break + } + } + } catch (err) { + log(`[${HOOK_NAME}] Failed to fetch messages for agent check`, { sessionID, error: String(err) }) + } + + log(`[${HOOK_NAME}] Agent check`, { + sessionID, + agentName: resolvedInfo?.agent, + skipAgents, + hasCompactionMessage, + }) + if (resolvedInfo?.agent && skipAgents.includes(resolvedInfo.agent)) { + log(`[${HOOK_NAME}] Skipped: agent in skipAgents list`, { sessionID, agent: resolvedInfo.agent }) + return + } + if (hasCompactionMessage && !resolvedInfo?.agent) { + log(`[${HOOK_NAME}] Skipped: compaction occurred but no agent info resolved`, { sessionID }) + return + } + + if (isContinuationStopped?.(sessionID)) { + log(`[${HOOK_NAME}] Skipped: continuation stopped for session`, { sessionID }) + return + } + + startCountdown(sessionID, incompleteCount, tasks.length, resolvedInfo) + return + } + + if (event.type === "message.updated") { + const info = props?.info as Record | undefined + const sessionID = info?.sessionID as string | undefined + const role = info?.role as string | undefined + + if (!sessionID) return + + if (role === "user") { + const state = sessions.get(sessionID) + if (state?.countdownStartedAt) { + const elapsed = Date.now() - state.countdownStartedAt + if (elapsed < COUNTDOWN_GRACE_PERIOD_MS) { + log(`[${HOOK_NAME}] Ignoring user message in grace period`, { sessionID, elapsed }) + return + } + } + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + + if (role === "assistant") { + const state = sessions.get(sessionID) + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + return + } + + if (event.type === "message.part.updated") { + const info = props?.info as Record | undefined + const sessionID = info?.sessionID as string | undefined + const role = info?.role as string | undefined + + if (sessionID && role === "assistant") { + const state = sessions.get(sessionID) + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + return + } + + if (event.type === "tool.execute.before" || event.type === "tool.execute.after") { + const sessionID = props?.sessionID as string | undefined + if (sessionID) { + const state = sessions.get(sessionID) + if (state) state.abortDetectedAt = undefined + cancelCountdown(sessionID) + } + return + } + + if (event.type === "session.deleted") { + const sessionInfo = props?.info as { id?: string } | undefined + if (sessionInfo?.id) { + cleanup(sessionInfo.id) + log(`[${HOOK_NAME}] Session deleted: cleaned up`, { sessionID: sessionInfo.id }) + } + return + } + } + + const cancelAllCountdowns = (): void => { + for (const sessionID of sessions.keys()) { + cancelCountdown(sessionID) + } + log(`[${HOOK_NAME}] All countdowns cancelled`) + } + + return { + handler, + markRecovering, + markRecoveryComplete, + cancelAllCountdowns, + } +} From 551dbc95f28d0ef9ffa2c2fe4494709b8ebb7229 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Fri, 6 Feb 2026 11:21:53 +0900 Subject: [PATCH 03/30] feat(hooks): register task-continuation-enforcer in plugin lifecycle Integrates at 4 points: creation (gated by task_system), session recovery callbacks, event handler, and stop-continuation command. --- src/hooks/index.ts | 1 + src/index.ts | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/hooks/index.ts b/src/hooks/index.ts index bffb447e6..b97c68637 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -1,4 +1,5 @@ export { createTodoContinuationEnforcer, type TodoContinuationEnforcer } from "./todo-continuation-enforcer"; +export { createTaskContinuationEnforcer, type TaskContinuationEnforcer } from "./task-continuation-enforcer"; export { createContextWindowMonitorHook } from "./context-window-monitor"; export { createSessionNotification } from "./session-notification"; export { createSessionRecoveryHook, type SessionRecoveryHook, type SessionRecoveryOptions } from "./session-recovery"; diff --git a/src/index.ts b/src/index.ts index baf4f9590..cde0398c2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,7 @@ import type { Plugin, ToolDefinition } from "@opencode-ai/plugin"; import { createTodoContinuationEnforcer, + createTaskContinuationEnforcer, createContextWindowMonitorHook, createSessionRecoveryHook, createSessionNotification, @@ -478,6 +479,21 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { }); const taskSystemEnabled = pluginConfig.experimental?.task_system ?? false; + + const taskContinuationEnforcer = isHookEnabled("task-continuation-enforcer") && taskSystemEnabled + ? createTaskContinuationEnforcer(ctx, pluginConfig, { + backgroundManager, + isContinuationStopped: stopContinuationGuard?.isStopped, + }) + : null; + + if (sessionRecovery && taskContinuationEnforcer) { + sessionRecovery.setOnAbortCallback(taskContinuationEnforcer.markRecovering); + sessionRecovery.setOnRecoveryCompleteCallback( + taskContinuationEnforcer.markRecoveryComplete, + ); + } + const taskToolsRecord: Record = taskSystemEnabled ? { task_create: createTaskCreateTool(pluginConfig, ctx), @@ -627,6 +643,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { await backgroundNotificationHook?.event(input); await sessionNotification?.(input); await todoContinuationEnforcer?.handler(input); + await taskContinuationEnforcer?.handler(input); await unstableAgentBabysitter?.event(input); await contextWindowMonitor?.event(input); await directoryAgentsInjector?.event(input); @@ -810,6 +827,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { if (command === "stop-continuation" && sessionID) { stopContinuationGuard?.stop(sessionID); todoContinuationEnforcer?.cancelAllCountdowns(); + taskContinuationEnforcer?.cancelAllCountdowns(); ralphLoop?.cancelLoop(sessionID); clearBoulderState(ctx.directory); log("[stop-continuation] All continuation mechanisms stopped", { From 01594a67af291aa09b9e6b8e34d36da56b1c13bd Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Fri, 6 Feb 2026 11:41:31 +0900 Subject: [PATCH 04/30] fix(hooks): compose session recovery callbacks for continuation enforcers Cubic found that registering task-continuation-enforcer recovery callbacks overrode the todo-continuation-enforcer callbacks. Compose the callbacks so both enforcers receive abort/recovery notifications. --- src/index.ts | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/index.ts b/src/index.ts index cde0398c2..bcdb6b6c1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -374,12 +374,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { ) : null; - if (sessionRecovery && todoContinuationEnforcer) { - sessionRecovery.setOnAbortCallback(todoContinuationEnforcer.markRecovering); - sessionRecovery.setOnRecoveryCompleteCallback( - todoContinuationEnforcer.markRecoveryComplete, - ); - } + // sessionRecovery callbacks are setters; compose callbacks so both enforcers are notified. const backgroundNotificationHook = isHookEnabled("background-notification") ? createBackgroundNotificationHook(backgroundManager) @@ -487,11 +482,15 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { }) : null; - if (sessionRecovery && taskContinuationEnforcer) { - sessionRecovery.setOnAbortCallback(taskContinuationEnforcer.markRecovering); - sessionRecovery.setOnRecoveryCompleteCallback( - taskContinuationEnforcer.markRecoveryComplete, - ); + if (sessionRecovery && (todoContinuationEnforcer || taskContinuationEnforcer)) { + sessionRecovery.setOnAbortCallback((sessionID) => { + todoContinuationEnforcer?.markRecovering(sessionID); + taskContinuationEnforcer?.markRecovering(sessionID); + }); + sessionRecovery.setOnRecoveryCompleteCallback((sessionID) => { + todoContinuationEnforcer?.markRecoveryComplete(sessionID); + taskContinuationEnforcer?.markRecoveryComplete(sessionID); + }); } const taskToolsRecord: Record = taskSystemEnabled From 139f392d76644d9a863e6653156cae90f1e40632 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 8 Feb 2026 03:38:39 +0000 Subject: [PATCH 05/30] release: v3.3.2 --- package.json | 16 ++++++++-------- packages/darwin-arm64/package.json | 2 +- packages/darwin-x64/package.json | 2 +- packages/linux-arm64-musl/package.json | 2 +- packages/linux-arm64/package.json | 2 +- packages/linux-x64-musl/package.json | 2 +- packages/linux-x64/package.json | 2 +- packages/windows-x64/package.json | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/package.json b/package.json index 07ee25b32..484523471 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode", - "version": "3.3.1", + "version": "3.3.2", "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -74,13 +74,13 @@ "typescript": "^5.7.3" }, "optionalDependencies": { - "oh-my-opencode-darwin-arm64": "3.3.1", - "oh-my-opencode-darwin-x64": "3.3.1", - "oh-my-opencode-linux-arm64": "3.3.1", - "oh-my-opencode-linux-arm64-musl": "3.3.1", - "oh-my-opencode-linux-x64": "3.3.1", - "oh-my-opencode-linux-x64-musl": "3.3.1", - "oh-my-opencode-windows-x64": "3.3.1" + "oh-my-opencode-darwin-arm64": "3.3.2", + "oh-my-opencode-darwin-x64": "3.3.2", + "oh-my-opencode-linux-arm64": "3.3.2", + "oh-my-opencode-linux-arm64-musl": "3.3.2", + "oh-my-opencode-linux-x64": "3.3.2", + "oh-my-opencode-linux-x64-musl": "3.3.2", + "oh-my-opencode-windows-x64": "3.3.2" }, "trustedDependencies": [ "@ast-grep/cli", diff --git a/packages/darwin-arm64/package.json b/packages/darwin-arm64/package.json index 2fed414c9..5b57136a3 100644 --- a/packages/darwin-arm64/package.json +++ b/packages/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-arm64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)", "license": "MIT", "repository": { diff --git a/packages/darwin-x64/package.json b/packages/darwin-x64/package.json index 090606c51..8de8e3e30 100644 --- a/packages/darwin-x64/package.json +++ b/packages/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-darwin-x64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (darwin-x64)", "license": "MIT", "repository": { diff --git a/packages/linux-arm64-musl/package.json b/packages/linux-arm64-musl/package.json index d81b16089..97db05f74 100644 --- a/packages/linux-arm64-musl/package.json +++ b/packages/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-arm64-musl", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)", "license": "MIT", "repository": { diff --git a/packages/linux-arm64/package.json b/packages/linux-arm64/package.json index c3a6d1c20..51af31d1f 100644 --- a/packages/linux-arm64/package.json +++ b/packages/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-arm64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-arm64)", "license": "MIT", "repository": { diff --git a/packages/linux-x64-musl/package.json b/packages/linux-x64-musl/package.json index 7618b168a..042d71801 100644 --- a/packages/linux-x64-musl/package.json +++ b/packages/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64-musl", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)", "license": "MIT", "repository": { diff --git a/packages/linux-x64/package.json b/packages/linux-x64/package.json index 0f5b0d531..4310493e8 100644 --- a/packages/linux-x64/package.json +++ b/packages/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-linux-x64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (linux-x64)", "license": "MIT", "repository": { diff --git a/packages/windows-x64/package.json b/packages/windows-x64/package.json index dba037421..84932c292 100644 --- a/packages/windows-x64/package.json +++ b/packages/windows-x64/package.json @@ -1,6 +1,6 @@ { "name": "oh-my-opencode-windows-x64", - "version": "3.3.1", + "version": "3.3.2", "description": "Platform-specific binary for oh-my-opencode (windows-x64)", "license": "MIT", "repository": { From 09999587f5d88e1f8da0bfc723a01c27e34ec024 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 12:38:42 +0900 Subject: [PATCH 06/30] fix(mcp): append EXA_API_KEY to Exa MCP URL when env var is set (#1627) --- src/mcp/websearch.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mcp/websearch.ts b/src/mcp/websearch.ts index 91eddccc2..8dd8516c5 100644 --- a/src/mcp/websearch.ts +++ b/src/mcp/websearch.ts @@ -31,7 +31,9 @@ export function createWebsearchConfig(config?: WebsearchConfig): RemoteMcpConfig // Default to Exa return { type: "remote" as const, - url: "https://mcp.exa.ai/mcp?tools=web_search_exa", + url: process.env.EXA_API_KEY + ? "https://mcp.exa.ai/mcp?tools=web_search_exa&exaApiKey=" + process.env.EXA_API_KEY + : "https://mcp.exa.ai/mcp?tools=web_search_exa", enabled: true, headers: process.env.EXA_API_KEY ? { "x-api-key": process.env.EXA_API_KEY } From f1fcc26aaaede86eaf6e829989ad9e4a07ee617d Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:05:06 +0900 Subject: [PATCH 07/30] fix(background-agent): serialize parent notifications (#1582) --- src/features/background-agent/manager.test.ts | 93 +++++++++++++++++++ src/features/background-agent/manager.ts | 47 ++++++++-- 2 files changed, 131 insertions(+), 9 deletions(-) diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index d81698cb5..c4db9056b 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -1123,6 +1123,99 @@ describe("BackgroundManager.tryCompleteTask", () => { expect(task.status).toBe("completed") expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined() }) + + test("should avoid overlapping promptAsync calls when tasks complete concurrently", async () => { + // given + type PromptAsyncBody = Record & { noReply?: boolean } + + let resolveMessages: ((value: { data: unknown[] }) => void) | undefined + const messagesBarrier = new Promise<{ data: unknown[] }>((resolve) => { + resolveMessages = resolve + }) + + const promptBodies: PromptAsyncBody[] = [] + let promptInFlight = false + let rejectedCount = 0 + let promptCallCount = 0 + + let releaseFirstPrompt: (() => void) | undefined + let resolveFirstStarted: (() => void) | undefined + const firstStarted = new Promise((resolve) => { + resolveFirstStarted = resolve + }) + + const client = { + session: { + prompt: async () => ({}), + abort: async () => ({}), + messages: async () => messagesBarrier, + promptAsync: async (args: { path: { id: string }; body: PromptAsyncBody }) => { + promptBodies.push(args.body) + + if (!promptInFlight) { + promptCallCount += 1 + if (promptCallCount === 1) { + promptInFlight = true + resolveFirstStarted?.() + return await new Promise((resolve) => { + releaseFirstPrompt = () => { + promptInFlight = false + resolve({}) + } + }) + } + + return {} + } + + rejectedCount += 1 + throw new Error("BUSY") + }, + }, + } + + manager.shutdown() + manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput) + + const parentSessionID = "parent-session" + const taskA = createMockTask({ + id: "task-a", + sessionID: "session-a", + parentSessionID, + }) + const taskB = createMockTask({ + id: "task-b", + sessionID: "session-b", + parentSessionID, + }) + + getTaskMap(manager).set(taskA.id, taskA) + getTaskMap(manager).set(taskB.id, taskB) + getPendingByParent(manager).set(parentSessionID, new Set([taskA.id, taskB.id])) + + // when + const completionA = tryCompleteTaskForTest(manager, taskA) + const completionB = tryCompleteTaskForTest(manager, taskB) + resolveMessages?.({ data: [] }) + + await firstStarted + + // Give the second completion a chance to attempt promptAsync while the first is in-flight. + // In the buggy implementation, this triggers an overlap and increments rejectedCount. + for (let i = 0; i < 20; i++) { + await Promise.resolve() + if (rejectedCount > 0) break + if (promptBodies.length >= 2) break + } + + releaseFirstPrompt?.() + await Promise.all([completionA, completionB]) + + // then + expect(rejectedCount).toBe(0) + expect(promptBodies.length).toBe(2) + expect(promptBodies.some((b) => b.noReply === false)).toBe(true) + }) }) describe("BackgroundManager.trackTask", () => { diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index e631f30a0..0604c876b 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -89,6 +89,7 @@ export class BackgroundManager { private processingKeys: Set = new Set() private completionTimers: Map> = new Map() private idleDeferralTimers: Map> = new Map() + private notificationQueueByParent: Map> = new Map() constructor( ctx: PluginInput, @@ -358,7 +359,7 @@ export class BackgroundManager { this.markForNotification(existingTask) this.cleanupPendingByParent(existingTask) - this.notifyParentSession(existingTask).catch(err => { + this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => { log("[background-agent] Failed to notify on error:", err) }) } @@ -615,7 +616,7 @@ export class BackgroundManager { this.markForNotification(existingTask) this.cleanupPendingByParent(existingTask) - this.notifyParentSession(existingTask).catch(err => { + this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => { log("[background-agent] Failed to notify on resume error:", err) }) }) @@ -949,7 +950,7 @@ export class BackgroundManager { this.markForNotification(task) try { - await this.notifyParentSession(task) + await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) log(`[background-agent] Task cancelled via ${source}:`, task.id) } catch (err) { log("[background-agent] Error in notifyParentSession for cancelled task:", { taskId: task.id, error: err }) @@ -1084,7 +1085,7 @@ export class BackgroundManager { } try { - await this.notifyParentSession(task) + await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) log(`[background-agent] Task completed via ${source}:`, task.id) } catch (err) { log("[background-agent] Error in notifyParentSession:", { taskId: task.id, error: err }) @@ -1114,16 +1115,19 @@ export class BackgroundManager { // Update pending tracking and check if all tasks complete const pendingSet = this.pendingByParent.get(task.parentSessionID) + let allComplete = false + let remainingCount = 0 if (pendingSet) { pendingSet.delete(task.id) - if (pendingSet.size === 0) { + remainingCount = pendingSet.size + allComplete = remainingCount === 0 + if (allComplete) { this.pendingByParent.delete(task.parentSessionID) } + } else { + allComplete = true } - const allComplete = !pendingSet || pendingSet.size === 0 - const remainingCount = pendingSet?.size ?? 0 - const statusText = task.status === "completed" ? "COMPLETED" : "CANCELLED" const errorInfo = task.error ? `\n**Error:** ${task.error}` : "" @@ -1378,7 +1382,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea log(`[background-agent] Task ${task.id} interrupted: stale timeout`) try { - await this.notifyParentSession(task) + await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)) } catch (err) { log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err }) } @@ -1572,12 +1576,37 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea this.tasks.clear() this.notifications.clear() this.pendingByParent.clear() + this.notificationQueueByParent.clear() this.queuesByKey.clear() this.processingKeys.clear() this.unregisterProcessCleanup() log("[background-agent] Shutdown complete") } + + private enqueueNotificationForParent( + parentSessionID: string | undefined, + operation: () => Promise + ): Promise { + if (!parentSessionID) { + return operation() + } + + const previous = this.notificationQueueByParent.get(parentSessionID) ?? Promise.resolve() + const current = previous + .catch(() => {}) + .then(operation) + + this.notificationQueueByParent.set(parentSessionID, current) + + void current.finally(() => { + if (this.notificationQueueByParent.get(parentSessionID) === current) { + this.notificationQueueByParent.delete(parentSessionID) + } + }).catch(() => {}) + + return current + } } function registerProcessSignal( From f6fc30ada51bf13aa5c98377e20a28b46b2089e7 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:09:58 +0900 Subject: [PATCH 08/30] fix: add default value for load_skills parameter in task tool (#1493) Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --- src/tools/delegate-task/tools.test.ts | 89 +++++++++++++++------------ src/tools/delegate-task/tools.ts | 4 +- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 63a42297f..6b08948e6 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -849,46 +849,59 @@ describe("sisyphus-task", () => { }) describe("skills parameter", () => { - test("skills parameter is required - throws error when not provided", async () => { + test("load_skills defaults to empty array when not provided (undefined)", async () => { // given const { createDelegateTask } = require("./tools") - - const mockManager = { launch: async () => ({}) } - const mockClient = { - app: { agents: async () => ({ data: [] }) }, - config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - create: async () => ({ data: { id: "test-session" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ data: [] }), - }, - } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "sisyphus", - abort: new AbortController().signal, - } - - // when - skills not provided (undefined) - // then - should throw error about missing skills - await expect(tool.execute( - { - description: "Test task", - prompt: "Do something", - category: "ultrabrain", - run_in_background: false, - }, - toolContext - )).rejects.toThrow("IT IS HIGHLY RECOMMENDED") - }) + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { agents: async () => ({ data: [] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_default_skills" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }] + }), + status: async () => ({ data: {} }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - load_skills not provided (undefined) - should default to [] + await tool.execute( + { + description: "Test task", + prompt: "Do something", + category: "ultrabrain", + run_in_background: false, + }, + toolContext + ) + + // then - should proceed without error, prompt should be called + expect(promptBody).toBeDefined() + }, { timeout: 20000 }) test("null skills throws error", async () => { // given diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts index 1db72408c..582eb11fd 100644 --- a/src/tools/delegate-task/tools.ts +++ b/src/tools/delegate-task/tools.ts @@ -74,7 +74,7 @@ Prompts MUST be in English.` return tool({ description, args: { - load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."), + load_skills: tool.schema.array(tool.schema.string()).default([]).describe("Skill names to inject. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."), description: tool.schema.string().describe("Short task description (3-5 words)"), prompt: tool.schema.string().describe("Full detailed prompt for the agent"), run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"), @@ -97,7 +97,7 @@ Prompts MUST be in English.` throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`) } if (args.load_skills === undefined) { - throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`) + args.load_skills = [] } if (args.load_skills === null) { throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`) From 104b9fbb39cc855a1d00ea4add55fa8e9ba30904 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:13:47 +0900 Subject: [PATCH 09/30] test: add regression tests for sisyphus-junior model override in category delegation (#1295) Add targeted regression tests for the exact reproduction scenario from issue #1295: - quick category with sisyphusJuniorModel override (the reported scenario) - user-defined custom category with sisyphusJuniorModel fallback The underlying fix was already applied in PRs #1470 and #1556. These tests ensure the fix does not regress. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --- src/tools/delegate-task/tools.test.ts | 121 ++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 63a42297f..bd8d8e862 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -2058,6 +2058,127 @@ describe("sisyphus-task", () => { expect(launchInput.model.providerID).toBe("openai") expect(launchInput.model.modelID).toBe("gpt-5.3-codex") }) + + test("sisyphus-junior model override works with quick category (#1295)", async () => { + // given - user configures agents.sisyphus-junior.model but uses quick category + const { createDelegateTask } = require("./tools") + let launchInput: any + + const mockManager = { + launch: async (input: any) => { + launchInput = input + return { + id: "task-1295-quick", + sessionID: "ses_1295_quick", + description: "Issue 1295 regression", + agent: "sisyphus-junior", + status: "running", + } + }, + } + + const mockClient = { + app: { agents: async () => ({ data: [] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + model: { list: async () => [] }, + session: { + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + sisyphusJuniorModel: "anthropic/claude-sonnet-4-5", + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - using quick category (default: anthropic/claude-haiku-4-5) + await tool.execute( + { + description: "Issue 1295 quick category test", + prompt: "Quick task", + category: "quick", + run_in_background: true, + load_skills: [], + }, + toolContext + ) + + // then - sisyphus-junior override model should be used, not category default + expect(launchInput.model.providerID).toBe("anthropic") + expect(launchInput.model.modelID).toBe("claude-sonnet-4-5") + }) + + test("sisyphus-junior model override works with user-defined category (#1295)", async () => { + // given - user has a custom category with no model requirement + const { createDelegateTask } = require("./tools") + let launchInput: any + + const mockManager = { + launch: async (input: any) => { + launchInput = input + return { + id: "task-1295-custom", + sessionID: "ses_1295_custom", + description: "Issue 1295 custom category", + agent: "sisyphus-junior", + status: "running", + } + }, + } + + const mockClient = { + app: { agents: async () => ({ data: [] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + model: { list: async () => [] }, + session: { + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + sisyphusJuniorModel: "openai/gpt-5.2", + userCategories: { + "my-custom": { temperature: 0.5 }, + }, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - using custom category with no explicit model + await tool.execute( + { + description: "Custom category with agent model", + prompt: "Do something custom", + category: "my-custom", + run_in_background: true, + load_skills: [], + }, + toolContext + ) + + // then - sisyphus-junior override model should be used as fallback + expect(launchInput.model.providerID).toBe("openai") + expect(launchInput.model.modelID).toBe("gpt-5.2") + }) }) describe("browserProvider propagation", () => { From a06364081b33417621538265359e8038bb3ab047 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:14:11 +0900 Subject: [PATCH 10/30] fix(delegate-task): resolve user agent model config in subagent_type path (#1357) --- src/index.ts | 1 + src/tools/delegate-task/executor.ts | 47 +++++- src/tools/delegate-task/tools.test.ts | 207 +++++++++++++++++++++++++- src/tools/delegate-task/types.ts | 3 +- 4 files changed, 246 insertions(+), 12 deletions(-) diff --git a/src/index.ts b/src/index.ts index db49858c3..41168e3c5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -488,6 +488,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { disabledSkills, availableCategories, availableSkills, + agentOverrides: pluginConfig.agents, onSyncSessionCreated: async (event) => { log("[index] onSyncSessionCreated callback", { sessionID: event.sessionID, diff --git a/src/tools/delegate-task/executor.ts b/src/tools/delegate-task/executor.ts index 2a3be2b2d..886364f0f 100644 --- a/src/tools/delegate-task/executor.ts +++ b/src/tools/delegate-task/executor.ts @@ -1,5 +1,5 @@ import type { BackgroundManager } from "../../features/background-agent" -import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider } from "../../config/schema" +import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider, AgentOverrides } from "../../config/schema" import type { ModelFallbackInfo } from "../../features/task-toast-manager/types" import type { DelegateTaskArgs, ToolContextWithMetadata, OpencodeClient } from "./types" import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS, isPlanAgent } from "./constants" @@ -15,7 +15,7 @@ import { subagentSessions, getSessionAgent } from "../../features/claude-code-se import { log, getAgentToolRestrictions, resolveModelPipeline, promptWithModelSuggestionRetry } from "../../shared" import { fetchAvailableModels, isModelAvailable } from "../../shared/model-availability" import { readConnectedProvidersCache } from "../../shared/connected-providers-cache" -import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements" +import { AGENT_MODEL_REQUIREMENTS, CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements" import { storeToolMetadata } from "../../features/tool-metadata-store" const SISYPHUS_JUNIOR_AGENT = "sisyphus-junior" @@ -28,6 +28,7 @@ export interface ExecutorContext { gitMasterConfig?: GitMasterConfig sisyphusJuniorModel?: string browserProvider?: BrowserAutomationProvider + agentOverrides?: AgentOverrides onSyncSessionCreated?: (event: { sessionID: string; parentID: string; title: string }) => Promise } @@ -940,8 +941,8 @@ export async function resolveSubagentExecution( executorCtx: ExecutorContext, parentAgent: string | undefined, categoryExamples: string -): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string } | undefined; error?: string }> { - const { client } = executorCtx +): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; error?: string }> { + const { client, agentOverrides } = executorCtx if (!args.subagent_type?.trim()) { return { agentToUse: "", categoryModel: undefined, error: `Agent name cannot be empty.` } @@ -970,7 +971,7 @@ Create the work plan directly - that's your job as the planning agent.`, } let agentToUse = agentName - let categoryModel: { providerID: string; modelID: string } | undefined + let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined try { const agentsResult = await client.app.agents() @@ -1007,7 +1008,41 @@ Create the work plan directly - that's your job as the planning agent.`, agentToUse = matchedAgent.name - if (matchedAgent.model) { + const agentNameLower = agentToUse.toLowerCase() + const agentOverride = agentOverrides?.[agentNameLower as keyof typeof agentOverrides] + ?? (agentOverrides ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentNameLower)?.[1] : undefined) + const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentNameLower] + + if (agentOverride?.model || agentRequirement) { + const connectedProviders = readConnectedProvidersCache() + const availableModels = await fetchAvailableModels(client, { + connectedProviders: connectedProviders ?? undefined, + }) + + const matchedAgentModelStr = matchedAgent.model + ? `${matchedAgent.model.providerID}/${matchedAgent.model.modelID}` + : undefined + + const resolution = resolveModelPipeline({ + intent: { + userModel: agentOverride?.model, + categoryDefaultModel: matchedAgentModelStr, + }, + constraints: { availableModels }, + policy: { + fallbackChain: agentRequirement?.fallbackChain, + systemDefaultModel: undefined, + }, + }) + + if (resolution) { + const parsed = parseModelString(resolution.model) + if (parsed) { + const variantToUse = agentOverride?.variant ?? resolution.variant + categoryModel = variantToUse ? { ...parsed, variant: variantToUse } : parsed + } + } + } else if (matchedAgent.model) { categoryModel = matchedAgent.model } } catch { diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 63a42297f..25609dc0e 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -2841,8 +2841,8 @@ describe("sisyphus-task", () => { }) }, { timeout: 20000 }) - test("agent without model does not override categoryModel", async () => { - // given - agent registered without model field + test("agent without model resolves via fallback chain", async () => { + // given - agent registered without model field, fallback chain should resolve const { createDelegateTask } = require("./tools") let promptBody: any @@ -2857,7 +2857,7 @@ describe("sisyphus-task", () => { app: { agents: async () => ({ data: [ - { name: "explore", mode: "subagent" }, // no model field + { name: "explore", mode: "subagent" }, ], }), }, @@ -2898,8 +2898,205 @@ describe("sisyphus-task", () => { toolContext ) - // then - no model should be passed to session.prompt - expect(promptBody.model).toBeUndefined() + // then - model should be resolved via AGENT_MODEL_REQUIREMENTS fallback chain + expect(promptBody.model).toBeDefined() + }, { timeout: 20000 }) + + test("agentOverrides model takes priority over matchedAgent.model (#1357)", async () => { + // given - user configured oracle to use a specific model in oh-my-opencode.json + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { + agents: async () => ({ + data: [ + { name: "oracle", mode: "subagent", model: { providerID: "openai", modelID: "gpt-5.2" } }, + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_override_model" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }], + }), + status: async () => ({ data: { "ses_override_model": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + agentOverrides: { + oracle: { model: "anthropic/claude-opus-4-6" }, + }, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - delegating to oracle via subagent_type with user override + await tool.execute( + { + description: "Consult oracle with override", + prompt: "Review architecture", + subagent_type: "oracle", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + // then - user-configured model should take priority over matchedAgent.model + expect(promptBody.model).toEqual({ + providerID: "anthropic", + modelID: "claude-opus-4-6", + }) + }, { timeout: 20000 }) + + test("agentOverrides variant is applied when model is overridden (#1357)", async () => { + // given - user configured oracle with model and variant + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { + agents: async () => ({ + data: [ + { name: "oracle", mode: "subagent", model: { providerID: "openai", modelID: "gpt-5.2" } }, + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_variant_test" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }], + }), + status: async () => ({ data: { "ses_variant_test": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + agentOverrides: { + oracle: { model: "anthropic/claude-opus-4-6", variant: "max" }, + }, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - delegating to oracle via subagent_type with variant override + await tool.execute( + { + description: "Consult oracle with variant", + prompt: "Review architecture", + subagent_type: "oracle", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + // then - user-configured variant should be applied + expect(promptBody.variant).toBe("max") + }, { timeout: 20000 }) + + test("fallback chain resolves model when no override and no matchedAgent.model (#1357)", async () => { + // given - agent registered without model, no override, but AGENT_MODEL_REQUIREMENTS has fallback + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { + agents: async () => ({ + data: [ + { name: "oracle", mode: "subagent" }, // no model field + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_fallback_test" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }], + }), + status: async () => ({ data: { "ses_fallback_test": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + // no agentOverrides + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + // when - delegating to oracle with no override and no matchedAgent model + await tool.execute( + { + description: "Consult oracle with fallback", + prompt: "Review architecture", + subagent_type: "oracle", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + // then - should resolve via AGENT_MODEL_REQUIREMENTS fallback chain for oracle + // oracle fallback chain: gpt-5.2 (openai) > gemini-3-pro (google) > claude-opus-4-6 (anthropic) + // Since openai is in connectedProviders, should resolve to openai/gpt-5.2 + expect(promptBody.model).toBeDefined() + expect(promptBody.model.providerID).toBe("openai") + expect(promptBody.model.modelID).toContain("gpt-5.2") }, { timeout: 20000 }) }) diff --git a/src/tools/delegate-task/types.ts b/src/tools/delegate-task/types.ts index 1fb4b4a6f..1646b1fe9 100644 --- a/src/tools/delegate-task/types.ts +++ b/src/tools/delegate-task/types.ts @@ -1,6 +1,6 @@ import type { PluginInput } from "@opencode-ai/plugin" import type { BackgroundManager } from "../../features/background-agent" -import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider } from "../../config/schema" +import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider, AgentOverrides } from "../../config/schema" import type { AvailableCategory, AvailableSkill, @@ -53,6 +53,7 @@ export interface DelegateTaskToolOptions { disabledSkills?: Set availableCategories?: AvailableCategory[] availableSkills?: AvailableSkill[] + agentOverrides?: AgentOverrides onSyncSessionCreated?: (event: SyncSessionCreatedEvent) => Promise } From 7abefcca1fa6ed23c899462d538328345b2c4a5d Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:16:16 +0900 Subject: [PATCH 11/30] feat: auto-recover from Anthropic assistant message prefill errors When Anthropic models reject requests with 'This model does not support assistant message prefill', detect this as a recoverable error type and automatically send 'Continue' once to resume the conversation. Extends session-recovery hook with new 'assistant_prefill_unsupported' error type. The existing session.error handler in index.ts already sends 'continue' after successful recovery, so no additional logic needed. --- src/hooks/session-recovery/index.test.ts | 57 ++++++++++++++++++++++++ src/hooks/session-recovery/index.ts | 12 +++++ 2 files changed, 69 insertions(+) diff --git a/src/hooks/session-recovery/index.test.ts b/src/hooks/session-recovery/index.test.ts index 93d7990a9..257fe05a5 100644 --- a/src/hooks/session-recovery/index.test.ts +++ b/src/hooks/session-recovery/index.test.ts @@ -129,6 +129,63 @@ describe("detectErrorType", () => { }) }) + describe("assistant_prefill_unsupported errors", () => { + it("should detect assistant message prefill error from direct message", () => { + //#given an error about assistant message prefill not being supported + const error = { + message: "This model does not support assistant message prefill. The conversation must end with a user message.", + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + + it("should detect assistant message prefill error from nested error object", () => { + //#given an Anthropic API error with nested structure matching the real error format + const error = { + error: { + type: "invalid_request_error", + message: "This model does not support assistant message prefill. The conversation must end with a user message.", + }, + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + + it("should detect error with only 'conversation must end with a user message' fragment", () => { + //#given an error containing only the user message requirement + const error = { + message: "The conversation must end with a user message.", + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + + it("should detect error with only 'assistant message prefill' fragment", () => { + //#given an error containing only the prefill mention + const error = { + message: "This model does not support assistant message prefill.", + } + + //#when detectErrorType is called + const result = detectErrorType(error) + + //#then should return assistant_prefill_unsupported + expect(result).toBe("assistant_prefill_unsupported") + }) + }) + describe("unrecognized errors", () => { it("should return null for unrecognized error patterns", () => { // given an unrelated error diff --git a/src/hooks/session-recovery/index.ts b/src/hooks/session-recovery/index.ts index 2aecee157..9f73c074c 100644 --- a/src/hooks/session-recovery/index.ts +++ b/src/hooks/session-recovery/index.ts @@ -28,6 +28,7 @@ type RecoveryErrorType = | "tool_result_missing" | "thinking_block_order" | "thinking_disabled_violation" + | "assistant_prefill_unsupported" | null interface MessageInfo { @@ -126,6 +127,13 @@ function extractMessageIndex(error: unknown): number | null { export function detectErrorType(error: unknown): RecoveryErrorType { const message = getErrorMessage(error) + if ( + message.includes("assistant message prefill") || + message.includes("conversation must end with a user message") + ) { + return "assistant_prefill_unsupported" + } + // IMPORTANT: Check thinking_block_order BEFORE tool_result_missing // because Anthropic's extended thinking error messages contain "tool_use" and "tool_result" // in the documentation URL, which would incorrectly match tool_result_missing @@ -375,11 +383,13 @@ export function createSessionRecoveryHook(ctx: PluginInput, options?: SessionRec tool_result_missing: "Tool Crash Recovery", thinking_block_order: "Thinking Block Recovery", thinking_disabled_violation: "Thinking Strip Recovery", + assistant_prefill_unsupported: "Prefill Error Recovery", } const toastMessages: Record = { tool_result_missing: "Injecting cancelled tool results...", thinking_block_order: "Fixing message structure...", thinking_disabled_violation: "Stripping thinking blocks...", + assistant_prefill_unsupported: "Sending 'Continue' to recover...", } await ctx.client.tui @@ -411,6 +421,8 @@ export function createSessionRecoveryHook(ctx: PluginInput, options?: SessionRec const resumeConfig = extractResumeConfig(lastUser, sessionID) await resumeSession(ctx.client, resumeConfig) } + } else if (errorType === "assistant_prefill_unsupported") { + success = true } return success From b88a868173f236e57b740107e51615d0fe870e4e Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:22:56 +0900 Subject: [PATCH 12/30] fix(config): plan agent inherits model settings from prometheus when not explicitly configured Previously, demoted plan agent only received { mode: 'subagent' } with no model settings, causing fallback to step-3.5-flash. Now inherits all model-related settings (model, variant, temperature, top_p, maxTokens, thinking, reasoningEffort, textVerbosity, providerOptions) from the resolved prometheus config. User overrides via agents.plan.* take priority. Prompt, permission, description, and color are intentionally NOT inherited. --- src/plugin-handlers/config-handler.test.ts | 181 ++++++++++++++++++ src/plugin-handlers/config-handler.ts | 7 +- .../plan-model-inheritance.test.ts | 118 ++++++++++++ src/plugin-handlers/plan-model-inheritance.ts | 27 +++ 4 files changed, 331 insertions(+), 2 deletions(-) create mode 100644 src/plugin-handlers/plan-model-inheritance.test.ts create mode 100644 src/plugin-handlers/plan-model-inheritance.ts diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts index d33f37184..e88f2b4e4 100644 --- a/src/plugin-handlers/config-handler.test.ts +++ b/src/plugin-handlers/config-handler.test.ts @@ -600,6 +600,187 @@ describe("Prometheus direct override priority over category", () => { }) }) +describe("Plan agent model inheritance from prometheus", () => { + test("plan agent inherits all model-related settings from resolved prometheus config", async () => { + //#given - prometheus resolves to claude-opus-4-6 with model settings + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "anthropic/claude-opus-4-6", + provenance: "provider-fallback", + variant: "max", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: { + plan: { + name: "plan", + mode: "primary", + prompt: "original plan prompt", + }, + }, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan inherits model and variant from prometheus, but NOT prompt + const agents = config.agent as Record + expect(agents.plan).toBeDefined() + expect(agents.plan.mode).toBe("subagent") + expect(agents.plan.model).toBe("anthropic/claude-opus-4-6") + expect(agents.plan.variant).toBe("max") + expect(agents.plan.prompt).toBeUndefined() + }) + + test("plan agent inherits temperature, reasoningEffort, and other model settings from prometheus", async () => { + //#given - prometheus configured with category that has temperature and reasoningEffort + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "openai/gpt-5.2", + provenance: "override", + variant: "high", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + agents: { + prometheus: { + model: "openai/gpt-5.2", + variant: "high", + temperature: 0.3, + top_p: 0.9, + maxTokens: 16000, + reasoningEffort: "high", + textVerbosity: "medium", + thinking: { type: "enabled", budgetTokens: 8000 }, + }, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan inherits ALL model-related settings from resolved prometheus + const agents = config.agent as Record> + expect(agents.plan).toBeDefined() + expect(agents.plan.mode).toBe("subagent") + expect(agents.plan.model).toBe("openai/gpt-5.2") + expect(agents.plan.variant).toBe("high") + expect(agents.plan.temperature).toBe(0.3) + expect(agents.plan.top_p).toBe(0.9) + expect(agents.plan.maxTokens).toBe(16000) + expect(agents.plan.reasoningEffort).toBe("high") + expect(agents.plan.textVerbosity).toBe("medium") + expect(agents.plan.thinking).toEqual({ type: "enabled", budgetTokens: 8000 }) + }) + + test("plan agent user override takes priority over prometheus inherited settings", async () => { + //#given - prometheus resolves to opus, but user has plan override for gpt-5.2 + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "anthropic/claude-opus-4-6", + provenance: "provider-fallback", + variant: "max", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + agents: { + plan: { + model: "openai/gpt-5.2", + variant: "high", + temperature: 0.5, + }, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan uses its own override, not prometheus settings + const agents = config.agent as Record> + expect(agents.plan.model).toBe("openai/gpt-5.2") + expect(agents.plan.variant).toBe("high") + expect(agents.plan.temperature).toBe(0.5) + }) + + test("plan agent does NOT inherit prompt, description, or color from prometheus", async () => { + //#given + spyOn(shared, "resolveModelPipeline" as any).mockReturnValue({ + model: "anthropic/claude-opus-4-6", + provenance: "provider-fallback", + variant: "max", + }) + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + replace_plan: true, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then - plan has model settings but NOT prompt/description/color + const agents = config.agent as Record> + expect(agents.plan.model).toBe("anthropic/claude-opus-4-6") + expect(agents.plan.prompt).toBeUndefined() + expect(agents.plan.description).toBeUndefined() + expect(agents.plan.color).toBeUndefined() + }) +}) + describe("Deadlock prevention - fetchAvailableModels must not receive client", () => { test("fetchAvailableModels should be called with undefined client to prevent deadlock during plugin init", async () => { // given - This test ensures we don't regress on issue #1301 diff --git a/src/plugin-handlers/config-handler.ts b/src/plugin-handlers/config-handler.ts index 41adbaf20..ea7c2856c 100644 --- a/src/plugin-handlers/config-handler.ts +++ b/src/plugin-handlers/config-handler.ts @@ -32,6 +32,7 @@ import { AGENT_NAME_MAP } from "../shared/migration"; import { AGENT_MODEL_REQUIREMENTS } from "../shared/model-requirements"; import { PROMETHEUS_SYSTEM_PROMPT, PROMETHEUS_PERMISSION } from "../agents/prometheus"; import { DEFAULT_CATEGORIES } from "../tools/delegate-task/constants"; +import { buildPlanDemoteConfig } from "./plan-model-inheritance"; import type { ModelCacheState } from "../plugin-state"; import type { CategoryConfig } from "../config/schema"; @@ -385,8 +386,10 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { : {}; const planDemoteConfig = shouldDemotePlan - ? { mode: "subagent" as const - } + ? buildPlanDemoteConfig( + agentConfig["prometheus"] as Record | undefined, + pluginConfig.agents?.plan as Record | undefined, + ) : undefined; config.agent = { diff --git a/src/plugin-handlers/plan-model-inheritance.test.ts b/src/plugin-handlers/plan-model-inheritance.test.ts new file mode 100644 index 000000000..3b68f0a16 --- /dev/null +++ b/src/plugin-handlers/plan-model-inheritance.test.ts @@ -0,0 +1,118 @@ +import { describe, test, expect } from "bun:test" +import { buildPlanDemoteConfig } from "./plan-model-inheritance" + +describe("buildPlanDemoteConfig", () => { + test("returns only mode when prometheus and plan override are both undefined", () => { + //#given + const prometheusConfig = undefined + const planOverride = undefined + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, planOverride) + + //#then + expect(result).toEqual({ mode: "subagent" }) + }) + + test("extracts all model settings from prometheus config", () => { + //#given + const prometheusConfig = { + name: "prometheus", + model: "anthropic/claude-opus-4-6", + variant: "max", + mode: "all", + prompt: "You are Prometheus...", + permission: { edit: "allow" }, + description: "Plan agent (Prometheus)", + color: "#FF5722", + temperature: 0.1, + top_p: 0.95, + maxTokens: 32000, + thinking: { type: "enabled", budgetTokens: 10000 }, + reasoningEffort: "high", + textVerbosity: "medium", + providerOptions: { key: "value" }, + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, undefined) + + //#then - picks model settings, NOT prompt/permission/description/color/name/mode + expect(result.mode).toBe("subagent") + expect(result.model).toBe("anthropic/claude-opus-4-6") + expect(result.variant).toBe("max") + expect(result.temperature).toBe(0.1) + expect(result.top_p).toBe(0.95) + expect(result.maxTokens).toBe(32000) + expect(result.thinking).toEqual({ type: "enabled", budgetTokens: 10000 }) + expect(result.reasoningEffort).toBe("high") + expect(result.textVerbosity).toBe("medium") + expect(result.providerOptions).toEqual({ key: "value" }) + expect(result.prompt).toBeUndefined() + expect(result.permission).toBeUndefined() + expect(result.description).toBeUndefined() + expect(result.color).toBeUndefined() + expect(result.name).toBeUndefined() + }) + + test("plan override takes priority over prometheus for all model settings", () => { + //#given + const prometheusConfig = { + model: "anthropic/claude-opus-4-6", + variant: "max", + temperature: 0.1, + reasoningEffort: "high", + } + const planOverride = { + model: "openai/gpt-5.2", + variant: "high", + temperature: 0.5, + reasoningEffort: "low", + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, planOverride) + + //#then + expect(result.model).toBe("openai/gpt-5.2") + expect(result.variant).toBe("high") + expect(result.temperature).toBe(0.5) + expect(result.reasoningEffort).toBe("low") + }) + + test("falls back to prometheus when plan override has partial settings", () => { + //#given + const prometheusConfig = { + model: "anthropic/claude-opus-4-6", + variant: "max", + temperature: 0.1, + reasoningEffort: "high", + } + const planOverride = { + model: "openai/gpt-5.2", + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, planOverride) + + //#then - plan model wins, rest inherits from prometheus + expect(result.model).toBe("openai/gpt-5.2") + expect(result.variant).toBe("max") + expect(result.temperature).toBe(0.1) + expect(result.reasoningEffort).toBe("high") + }) + + test("skips undefined values from both sources", () => { + //#given + const prometheusConfig = { + model: "anthropic/claude-opus-4-6", + } + + //#when + const result = buildPlanDemoteConfig(prometheusConfig, undefined) + + //#then + expect(result).toEqual({ mode: "subagent", model: "anthropic/claude-opus-4-6" }) + expect(Object.keys(result)).toEqual(["mode", "model"]) + }) +}) diff --git a/src/plugin-handlers/plan-model-inheritance.ts b/src/plugin-handlers/plan-model-inheritance.ts new file mode 100644 index 000000000..bb32483c5 --- /dev/null +++ b/src/plugin-handlers/plan-model-inheritance.ts @@ -0,0 +1,27 @@ +const MODEL_SETTINGS_KEYS = [ + "model", + "variant", + "temperature", + "top_p", + "maxTokens", + "thinking", + "reasoningEffort", + "textVerbosity", + "providerOptions", +] as const + +export function buildPlanDemoteConfig( + prometheusConfig: Record | undefined, + planOverride: Record | undefined, +): Record { + const modelSettings: Record = {} + + for (const key of MODEL_SETTINGS_KEYS) { + const value = planOverride?.[key] ?? prometheusConfig?.[key] + if (value !== undefined) { + modelSettings[key] = value + } + } + + return { mode: "subagent" as const, ...modelSettings } +} From a85da59358c9b539cb2f685bd76ac37668b6f2c3 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:28:08 +0900 Subject: [PATCH 13/30] fix: encode EXA_API_KEY before appending to URL query parameter --- src/mcp/websearch.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcp/websearch.ts b/src/mcp/websearch.ts index 8dd8516c5..aa129d846 100644 --- a/src/mcp/websearch.ts +++ b/src/mcp/websearch.ts @@ -32,7 +32,7 @@ export function createWebsearchConfig(config?: WebsearchConfig): RemoteMcpConfig return { type: "remote" as const, url: process.env.EXA_API_KEY - ? "https://mcp.exa.ai/mcp?tools=web_search_exa&exaApiKey=" + process.env.EXA_API_KEY + ? `https://mcp.exa.ai/mcp?tools=web_search_exa&exaApiKey=${encodeURIComponent(process.env.EXA_API_KEY)}` : "https://mcp.exa.ai/mcp?tools=web_search_exa", enabled: true, headers: process.env.EXA_API_KEY From 6ce482668b36fbf395ada8f7c5760ab95b2efd2f Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:30:00 +0900 Subject: [PATCH 14/30] refactor: extract git worktree parser from atlas hook --- src/hooks/atlas/index.ts | 113 +----------------- .../git-worktree/collect-git-diff-stats.ts | 29 +++++ .../git-worktree/format-file-changes.ts | 46 +++++++ src/shared/git-worktree/git-worktree.test.ts | 51 ++++++++ src/shared/git-worktree/index.ts | 5 + src/shared/git-worktree/parse-diff-numstat.ts | 27 +++++ .../git-worktree/parse-status-porcelain.ts | 25 ++++ src/shared/git-worktree/types.ts | 8 ++ src/shared/index.ts | 1 + 9 files changed, 195 insertions(+), 110 deletions(-) create mode 100644 src/shared/git-worktree/collect-git-diff-stats.ts create mode 100644 src/shared/git-worktree/format-file-changes.ts create mode 100644 src/shared/git-worktree/git-worktree.test.ts create mode 100644 src/shared/git-worktree/index.ts create mode 100644 src/shared/git-worktree/parse-diff-numstat.ts create mode 100644 src/shared/git-worktree/parse-status-porcelain.ts create mode 100644 src/shared/git-worktree/types.ts diff --git a/src/hooks/atlas/index.ts b/src/hooks/atlas/index.ts index ffad04598..b2608187e 100644 --- a/src/hooks/atlas/index.ts +++ b/src/hooks/atlas/index.ts @@ -1,5 +1,4 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { execSync } from "node:child_process" import { existsSync, readdirSync } from "node:fs" import { join } from "node:path" import { @@ -12,6 +11,7 @@ import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/ho import { log } from "../../shared/logger" import { createSystemDirective, SYSTEM_DIRECTIVE_PREFIX, SystemDirectiveTypes } from "../../shared/system-directive" import { isCallerOrchestrator, getMessageDir } from "../../shared/session-utils" +import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" import type { BackgroundManager } from "../../features/background-agent" export const HOOK_NAME = "atlas" @@ -269,113 +269,6 @@ function extractSessionIdFromOutput(output: string): string { return match?.[1] ?? "" } -interface GitFileStat { - path: string - added: number - removed: number - status: "modified" | "added" | "deleted" -} - -function getGitDiffStats(directory: string): GitFileStat[] { - try { - const output = execSync("git diff --numstat HEAD", { - cwd: directory, - encoding: "utf-8", - timeout: 5000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - - if (!output) return [] - - const statusOutput = execSync("git status --porcelain", { - cwd: directory, - encoding: "utf-8", - timeout: 5000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - - const statusMap = new Map() - for (const line of statusOutput.split("\n")) { - if (!line) continue - const status = line.substring(0, 2).trim() - const filePath = line.substring(3) - if (status === "A" || status === "??") { - statusMap.set(filePath, "added") - } else if (status === "D") { - statusMap.set(filePath, "deleted") - } else { - statusMap.set(filePath, "modified") - } - } - - const stats: GitFileStat[] = [] - for (const line of output.split("\n")) { - const parts = line.split("\t") - if (parts.length < 3) continue - - const [addedStr, removedStr, path] = parts - const added = addedStr === "-" ? 0 : parseInt(addedStr, 10) - const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10) - - stats.push({ - path, - added, - removed, - status: statusMap.get(path) ?? "modified", - }) - } - - return stats - } catch { - return [] - } -} - -function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { - if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n" - - const modified = stats.filter((s) => s.status === "modified") - const added = stats.filter((s) => s.status === "added") - const deleted = stats.filter((s) => s.status === "deleted") - - const lines: string[] = ["[FILE CHANGES SUMMARY]"] - - if (modified.length > 0) { - lines.push("Modified files:") - for (const f of modified) { - lines.push(` ${f.path} (+${f.added}, -${f.removed})`) - } - lines.push("") - } - - if (added.length > 0) { - lines.push("Created files:") - for (const f of added) { - lines.push(` ${f.path} (+${f.added})`) - } - lines.push("") - } - - if (deleted.length > 0) { - lines.push("Deleted files:") - for (const f of deleted) { - lines.push(` ${f.path} (-${f.removed})`) - } - lines.push("") - } - - if (notepadPath) { - const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus")) - if (notepadStat) { - lines.push("[NOTEPAD UPDATED]") - lines.push(` ${notepadStat.path} (+${notepadStat.added})`) - lines.push("") - } - } - - return lines.join("\n") -} - interface ToolExecuteAfterInput { tool: string sessionID?: string @@ -750,8 +643,8 @@ export function createAtlasHook( } if (output.output && typeof output.output === "string") { - const gitStats = getGitDiffStats(ctx.directory) - const fileChanges = formatFileChanges(gitStats) + const gitStats = collectGitDiffStats(ctx.directory) + const fileChanges = formatFileChanges(gitStats) const subagentSessionId = extractSessionIdFromOutput(output.output) const boulderState = readBoulderState(ctx.directory) diff --git a/src/shared/git-worktree/collect-git-diff-stats.ts b/src/shared/git-worktree/collect-git-diff-stats.ts new file mode 100644 index 000000000..158a09d82 --- /dev/null +++ b/src/shared/git-worktree/collect-git-diff-stats.ts @@ -0,0 +1,29 @@ +import { execSync } from "node:child_process" +import { parseGitStatusPorcelain } from "./parse-status-porcelain" +import { parseGitDiffNumstat } from "./parse-diff-numstat" +import type { GitFileStat } from "./types" + +export function collectGitDiffStats(directory: string): GitFileStat[] { + try { + const diffOutput = execSync("git diff --numstat HEAD", { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + + if (!diffOutput) return [] + + const statusOutput = execSync("git status --porcelain", { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + + const statusMap = parseGitStatusPorcelain(statusOutput) + return parseGitDiffNumstat(diffOutput, statusMap) + } catch { + return [] + } +} diff --git a/src/shared/git-worktree/format-file-changes.ts b/src/shared/git-worktree/format-file-changes.ts new file mode 100644 index 000000000..5afb58b8c --- /dev/null +++ b/src/shared/git-worktree/format-file-changes.ts @@ -0,0 +1,46 @@ +import type { GitFileStat } from "./types" + +export function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { + if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n" + + const modified = stats.filter((s) => s.status === "modified") + const added = stats.filter((s) => s.status === "added") + const deleted = stats.filter((s) => s.status === "deleted") + + const lines: string[] = ["[FILE CHANGES SUMMARY]"] + + if (modified.length > 0) { + lines.push("Modified files:") + for (const f of modified) { + lines.push(` ${f.path} (+${f.added}, -${f.removed})`) + } + lines.push("") + } + + if (added.length > 0) { + lines.push("Created files:") + for (const f of added) { + lines.push(` ${f.path} (+${f.added})`) + } + lines.push("") + } + + if (deleted.length > 0) { + lines.push("Deleted files:") + for (const f of deleted) { + lines.push(` ${f.path} (-${f.removed})`) + } + lines.push("") + } + + if (notepadPath) { + const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus")) + if (notepadStat) { + lines.push("[NOTEPAD UPDATED]") + lines.push(` ${notepadStat.path} (+${notepadStat.added})`) + lines.push("") + } + } + + return lines.join("\n") +} diff --git a/src/shared/git-worktree/git-worktree.test.ts b/src/shared/git-worktree/git-worktree.test.ts new file mode 100644 index 000000000..27183018b --- /dev/null +++ b/src/shared/git-worktree/git-worktree.test.ts @@ -0,0 +1,51 @@ +/// + +import { describe, expect, test } from "bun:test" +import { formatFileChanges, parseGitDiffNumstat, parseGitStatusPorcelain } from "./index" + +describe("git-worktree", () => { + test("#given status porcelain output #when parsing #then maps paths to statuses", () => { + const porcelain = [ + " M src/a.ts", + "A src/b.ts", + "?? src/c.ts", + "D src/d.ts", + ].join("\n") + + const map = parseGitStatusPorcelain(porcelain) + expect(map.get("src/a.ts")).toBe("modified") + expect(map.get("src/b.ts")).toBe("added") + expect(map.get("src/c.ts")).toBe("added") + expect(map.get("src/d.ts")).toBe("deleted") + }) + + test("#given diff numstat and status map #when parsing #then returns typed stats", () => { + const porcelain = [" M src/a.ts", "A src/b.ts"].join("\n") + const statusMap = parseGitStatusPorcelain(porcelain) + + const numstat = ["1\t2\tsrc/a.ts", "3\t0\tsrc/b.ts", "-\t-\tbin.dat"].join("\n") + const stats = parseGitDiffNumstat(numstat, statusMap) + + expect(stats).toEqual([ + { path: "src/a.ts", added: 1, removed: 2, status: "modified" }, + { path: "src/b.ts", added: 3, removed: 0, status: "added" }, + { path: "bin.dat", added: 0, removed: 0, status: "modified" }, + ]) + }) + + test("#given git file stats #when formatting #then produces grouped summary", () => { + const summary = formatFileChanges([ + { path: "src/a.ts", added: 1, removed: 2, status: "modified" }, + { path: "src/b.ts", added: 3, removed: 0, status: "added" }, + { path: "src/c.ts", added: 0, removed: 4, status: "deleted" }, + ]) + + expect(summary).toContain("[FILE CHANGES SUMMARY]") + expect(summary).toContain("Modified files:") + expect(summary).toContain("Created files:") + expect(summary).toContain("Deleted files:") + expect(summary).toContain("src/a.ts") + expect(summary).toContain("src/b.ts") + expect(summary).toContain("src/c.ts") + }) +}) diff --git a/src/shared/git-worktree/index.ts b/src/shared/git-worktree/index.ts new file mode 100644 index 000000000..0bc363d0f --- /dev/null +++ b/src/shared/git-worktree/index.ts @@ -0,0 +1,5 @@ +export type { GitFileStatus, GitFileStat } from "./types" +export { parseGitStatusPorcelain } from "./parse-status-porcelain" +export { parseGitDiffNumstat } from "./parse-diff-numstat" +export { collectGitDiffStats } from "./collect-git-diff-stats" +export { formatFileChanges } from "./format-file-changes" diff --git a/src/shared/git-worktree/parse-diff-numstat.ts b/src/shared/git-worktree/parse-diff-numstat.ts new file mode 100644 index 000000000..3ea2b0f6d --- /dev/null +++ b/src/shared/git-worktree/parse-diff-numstat.ts @@ -0,0 +1,27 @@ +import type { GitFileStat, GitFileStatus } from "./types" + +export function parseGitDiffNumstat( + output: string, + statusMap: Map +): GitFileStat[] { + if (!output) return [] + + const stats: GitFileStat[] = [] + for (const line of output.split("\n")) { + const parts = line.split("\t") + if (parts.length < 3) continue + + const [addedStr, removedStr, path] = parts + const added = addedStr === "-" ? 0 : parseInt(addedStr, 10) + const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10) + + stats.push({ + path, + added, + removed, + status: statusMap.get(path) ?? "modified", + }) + } + + return stats +} diff --git a/src/shared/git-worktree/parse-status-porcelain.ts b/src/shared/git-worktree/parse-status-porcelain.ts new file mode 100644 index 000000000..0623de5d9 --- /dev/null +++ b/src/shared/git-worktree/parse-status-porcelain.ts @@ -0,0 +1,25 @@ +import type { GitFileStatus } from "./types" + +export function parseGitStatusPorcelain(output: string): Map { + const map = new Map() + if (!output) return map + + for (const line of output.split("\n")) { + if (!line) continue + + const status = line.substring(0, 2).trim() + const filePath = line.substring(3) + + if (!filePath) continue + + if (status === "A" || status === "??") { + map.set(filePath, "added") + } else if (status === "D") { + map.set(filePath, "deleted") + } else { + map.set(filePath, "modified") + } + } + + return map +} diff --git a/src/shared/git-worktree/types.ts b/src/shared/git-worktree/types.ts new file mode 100644 index 000000000..eb4236990 --- /dev/null +++ b/src/shared/git-worktree/types.ts @@ -0,0 +1,8 @@ +export type GitFileStatus = "modified" | "added" | "deleted" + +export interface GitFileStat { + path: string + added: number + removed: number + status: GitFileStatus +} diff --git a/src/shared/index.ts b/src/shared/index.ts index d42be5a75..4ea346972 100644 --- a/src/shared/index.ts +++ b/src/shared/index.ts @@ -41,5 +41,6 @@ export * from "./tmux" export * from "./model-suggestion-retry" export * from "./opencode-server-auth" export * from "./port-utils" +export * from "./git-worktree" export * from "./safe-create-hook" export * from "./truncate-description" From f035be842d40202299fbde2a0dae9cd9ec09baa9 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:34:47 +0900 Subject: [PATCH 15/30] fix(agents): include custom agents in orchestrator delegation prompt (#1623) --- bun.lock | 28 ++--- src/agents/dynamic-agent-prompt-builder.ts | 4 +- src/agents/utils.test.ts | 50 +++++++++ src/agents/utils.ts | 105 ++++++++++++++++-- .../unstable-agent-babysitter/index.test.ts | 3 + 5 files changed, 165 insertions(+), 25 deletions(-) diff --git a/bun.lock b/bun.lock index 7c5f969e3..4a416c88d 100644 --- a/bun.lock +++ b/bun.lock @@ -28,13 +28,13 @@ "typescript": "^5.7.3", }, "optionalDependencies": { - "oh-my-opencode-darwin-arm64": "3.3.0", - "oh-my-opencode-darwin-x64": "3.3.0", - "oh-my-opencode-linux-arm64": "3.3.0", - "oh-my-opencode-linux-arm64-musl": "3.3.0", - "oh-my-opencode-linux-x64": "3.3.0", - "oh-my-opencode-linux-x64-musl": "3.3.0", - "oh-my-opencode-windows-x64": "3.3.0", + "oh-my-opencode-darwin-arm64": "3.3.1", + "oh-my-opencode-darwin-x64": "3.3.1", + "oh-my-opencode-linux-arm64": "3.3.1", + "oh-my-opencode-linux-arm64-musl": "3.3.1", + "oh-my-opencode-linux-x64": "3.3.1", + "oh-my-opencode-linux-x64-musl": "3.3.1", + "oh-my-opencode-windows-x64": "3.3.1", }, }, }, @@ -226,19 +226,19 @@ "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], - "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.0", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-P2kZKJqZaA4j0qtGM3I8+ZeH204ai27ni/OXLjtFdOewRjJgrahxaC1XslgK7q/KU9fXz6BQfEqAjbvyPf/rgQ=="], + "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-R+o42Km6bsIaW6D3I8uu2HCF3BjIWqa/fg38W5y4hJEOw4mL0Q7uV4R+0vtrXRHo9crXTK9ag0fqVQUm+Y6iAQ=="], - "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.0", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RopOorbW1WyhMQJ+ipuqiOA1GICS+3IkOwNyEe0KZlCLpoEDTyFopIL87HSns+gEQPMxnknroDp8lzxn1AKgjw=="], + "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7VTbpR1vH3OEkoJxBKtYuxFPX8M3IbJKoeHWME9iK6FpT11W1ASsjyuhvzB1jcxSeqF8ddMnjitlG5ub6h5EVw=="], - "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-297iEfuK+05g+q64crPW78Zbgm/j5PGjDDweSPkZ6rI6SEfHMvOIkGxMvN8gugM3zcH8FOCQXoY2nC8b6x3pwQ=="], + "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-BZ/r/CFlvbOxkdZZrRoT16xFOjibRZHuwQnaE4f0JvOzgK6/HWp3zJI1+2/aX/oK5GA6lZxNWRrJC/SKUi8LEg=="], - "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-oVxP0+yn66HQYfrl9QT6I7TumRzciuPB4z24+PwKEVcDjPbWXQqLY1gwOGHZAQBPLf0vwewv9ybEDVD42RRH4g=="], + "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-U90Wruf21h+CJbtcrS7MeTAc/5VOF6RI+5jr7qj/cCxjXNJtjhyJdz/maehArjtgf304+lYCM/Mh1i+G2D3YFQ=="], - "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-k9LoLkisLJwJNR1J0Bh1bjGtGBkl5D9WzFPSdZCAlyiT6TgG9w5erPTlXqtl2Lt0We5tYUVYlkEIHRMK/ugNsQ=="], + "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-sYzohSNdwsAhivbXcbhPdF1qqQi2CCI7FSgbmvvfBOMyZ8HAgqOFqYW2r3GPdmtywzkjOTvCzTG56FZwEjx15w=="], - "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7asXCeae7wBxJrzoZ7J6Yo1oaOxwUN3bTO7jWurCTMs5TDHO+pEHysgv/nuF1jvj1T+r1vg1H5ZmopuKy1qvXg=="], + "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aG5pZ4eWS0YSGUicOnjMkUPrIqQV4poYF+d9SIvrfvlaMcK6WlQn7jXzgNCwJsfGn5lyhSmjshZBEU+v79Ua3w=="], - "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.0", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ABvwfaXb2xdrpbivzlPPJzIm5vXp+QlVakkaHEQf3TU6Mi/+fehH6Qhq/KMh66FDO2gq3xmxbH7nktHRQp9kNA=="], + "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-FGH7cnzBqNwjSkzCDglMsVttaq+MsykAxa7ehaFK+0dnBZArvllS3W13a3dGaANHMZzfK0vz8hNDUdVi7Z63cA=="], "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], diff --git a/src/agents/dynamic-agent-prompt-builder.ts b/src/agents/dynamic-agent-prompt-builder.ts index c70da062e..defaeecb8 100644 --- a/src/agents/dynamic-agent-prompt-builder.ts +++ b/src/agents/dynamic-agent-prompt-builder.ts @@ -1,8 +1,8 @@ -import type { AgentPromptMetadata, BuiltinAgentName } from "./types" +import type { AgentPromptMetadata } from "./types" import { truncateDescription } from "../shared/truncate-description" export interface AvailableAgent { - name: BuiltinAgentName + name: string description: string metadata: AgentPromptMetadata } diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts index 88883feba..a101840f8 100644 --- a/src/agents/utils.test.ts +++ b/src/agents/utils.test.ts @@ -249,6 +249,56 @@ describe("createBuiltinAgents with model overrides", () => { expect(agents.sisyphus.prompt).toContain("frontend-ui-ux") expect(agents.sisyphus.prompt).toContain("git-master") }) + + test("includes custom agents from OpenCode registry in orchestrator prompts", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set([ + "anthropic/claude-opus-4-6", + "kimi-for-coding/k2p5", + "opencode/kimi-k2.5-free", + "zai-coding-plan/glm-4.7", + "opencode/glm-4.7-free", + "openai/gpt-5.2", + ]) + ) + + const client = { + agent: { + list: async () => ({ + data: [ + { + name: "researcher", + description: "Research agent for deep analysis", + mode: "subagent", + hidden: false, + }, + ], + }), + }, + } + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + client + ) + + // #then + expect(agents.sisyphus.prompt).toContain("researcher") + expect(agents.hephaestus.prompt).toContain("researcher") + expect(agents.atlas.prompt).toContain("researcher") + } finally { + fetchSpy.mockRestore() + } + }) }) describe("createBuiltinAgents without systemDefaultModel", () => { diff --git a/src/agents/utils.ts b/src/agents/utils.ts index 5aac0ebb4..bdd954884 100644 --- a/src/agents/utils.ts +++ b/src/agents/utils.ts @@ -11,7 +11,18 @@ import { createAtlasAgent, atlasPromptMetadata } from "./atlas" import { createMomusAgent, momusPromptMetadata } from "./momus" import { createHephaestusAgent } from "./hephaestus" import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder" -import { deepMerge, fetchAvailableModels, resolveModelPipeline, AGENT_MODEL_REQUIREMENTS, readConnectedProvidersCache, isModelAvailable, isAnyFallbackModelAvailable, isAnyProviderConnected, migrateAgentConfig } from "../shared" +import { + deepMerge, + fetchAvailableModels, + resolveModelPipeline, + AGENT_MODEL_REQUIREMENTS, + readConnectedProvidersCache, + isModelAvailable, + isAnyFallbackModelAvailable, + isAnyProviderConnected, + migrateAgentConfig, + truncateDescription, +} from "../shared" import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants" import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content" import { createBuiltinSkills } from "../features/builtin-skills" @@ -52,6 +63,65 @@ function isFactory(source: AgentSource): source is AgentFactory { return typeof source === "function" } +type RegisteredAgentSummary = { + name: string + description: string +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +function parseRegisteredAgentSummaries(input: unknown): RegisteredAgentSummary[] { + if (!Array.isArray(input)) return [] + + const result: RegisteredAgentSummary[] = [] + for (const item of input) { + if (!isRecord(item)) continue + + const name = typeof item.name === "string" ? item.name : undefined + if (!name) continue + + const hidden = item.hidden + if (hidden === true) continue + + const description = typeof item.description === "string" ? item.description : "" + result.push({ name, description }) + } + + return result +} + +async function fetchRegisteredAgentsFromClient(client: unknown): Promise { + if (!isRecord(client)) return [] + const agentObj = client.agent + if (!isRecord(agentObj)) return [] + const listFn = agentObj.list + if (typeof listFn !== "function") return [] + + try { + const response = await listFn.call(agentObj) + if (!isRecord(response)) return [] + return parseRegisteredAgentSummaries(response.data) + } catch { + return [] + } +} + +function buildCustomAgentMetadata(agentName: string, description: string): AgentPromptMetadata { + const shortDescription = truncateDescription(description).trim() + return { + category: "specialist", + cost: "CHEAP", + triggers: [ + { + domain: `Custom agent: ${agentName}`, + trigger: shortDescription || "Use when this agent's description matches the task", + }, + ], + } +} + export function buildAgent( source: AgentSource, model: string, @@ -279,6 +349,10 @@ export async function createBuiltinAgents( const availableSkills: AvailableSkill[] = [...builtinAvailable, ...discoveredAvailable] + const registeredAgents = await fetchRegisteredAgentsFromClient(client) + const builtinAgentNames = new Set(Object.keys(agentSources).map((n) => n.toLowerCase())) + const disabledAgentNames = new Set(disabledAgents.map((n) => n.toLowerCase())) + // Collect general agents first (for availableAgents), but don't add to result yet const pendingAgentConfigs: Map = new Map() @@ -335,14 +409,27 @@ export async function createBuiltinAgents( // Store for later - will be added after sisyphus and hephaestus pendingAgentConfigs.set(name, config) - const metadata = agentMetadata[agentName] - if (metadata) { - availableAgents.push({ - name: agentName, - description: config.description ?? "", - metadata, - }) - } + const metadata = agentMetadata[agentName] + if (metadata) { + availableAgents.push({ + name: agentName, + description: config.description ?? "", + metadata, + }) + } + } + + for (const agent of registeredAgents) { + const lowerName = agent.name.toLowerCase() + if (builtinAgentNames.has(lowerName)) continue + if (disabledAgentNames.has(lowerName)) continue + if (availableAgents.some((a) => a.name.toLowerCase() === lowerName)) continue + + availableAgents.push({ + name: agent.name, + description: agent.description, + metadata: buildCustomAgentMetadata(agent.name, agent.description), + }) } const sisyphusOverride = agentOverrides["sisyphus"] diff --git a/src/hooks/unstable-agent-babysitter/index.test.ts b/src/hooks/unstable-agent-babysitter/index.test.ts index f9900e7d5..9fc309ec9 100644 --- a/src/hooks/unstable-agent-babysitter/index.test.ts +++ b/src/hooks/unstable-agent-babysitter/index.test.ts @@ -21,6 +21,9 @@ function createMockPluginInput(options: { prompt: async (input: unknown) => { promptCalls.push({ input }) }, + promptAsync: async (input: unknown) => { + promptCalls.push({ input }) + }, }, }, } From 72cf908738d560c5d1ea68e2340bcb60cf1d7d83 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:42:15 +0900 Subject: [PATCH 16/30] fix(delegation): decouple plan agent from prometheus - remove aliasing Remove 'prometheus' from PLAN_AGENT_NAMES so isPlanAgent() no longer matches prometheus. The only remaining connection is model inheritance via buildPlanDemoteConfig() in plan-model-inheritance.ts. - Remove 'prometheus' from PLAN_AGENT_NAMES array - Update self-delegation error message to say 'plan agent' not 'prometheus' - Update tests: prometheus is no longer treated as a plan agent - Update task permission: only plan agents get task tool, not prometheus --- src/tools/delegate-task/constants.ts | 2 +- src/tools/delegate-task/tools.test.ts | 221 ++++++++++++++------------ 2 files changed, 123 insertions(+), 100 deletions(-) diff --git a/src/tools/delegate-task/constants.ts b/src/tools/delegate-task/constants.ts index 2bc8f95fd..99744f8ce 100644 --- a/src/tools/delegate-task/constants.ts +++ b/src/tools/delegate-task/constants.ts @@ -538,7 +538,7 @@ export function buildPlanAgentSystemPrepend( * List of agent names that should be treated as plan agents. * Case-insensitive matching is used. */ -export const PLAN_AGENT_NAMES = ["plan", "prometheus", "planner"] +export const PLAN_AGENT_NAMES = ["plan", "planner"] /** * Check if the given agent name is a plan agent. diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 63a42297f..773022439 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -135,12 +135,12 @@ describe("sisyphus-task", () => { expect(result).toBe(true) }) - test("returns true for 'prometheus'", () => { - // given / #when + test("returns false for 'prometheus' (decoupled from plan)", () => { + //#given / #when const result = isPlanAgent("prometheus") - // then - expect(result).toBe(true) + //#then - prometheus is NOT a plan agent + expect(result).toBe(false) }) test("returns true for 'planner'", () => { @@ -159,12 +159,12 @@ describe("sisyphus-task", () => { expect(result).toBe(true) }) - test("returns true for case-insensitive match 'Prometheus'", () => { - // given / #when + test("returns false for case-insensitive match 'Prometheus' (decoupled from plan)", () => { + //#given / #when const result = isPlanAgent("Prometheus") - // then - expect(result).toBe(true) + //#then - Prometheus is NOT a plan agent + expect(result).toBe(false) }) test("returns false for 'oracle'", () => { @@ -199,11 +199,11 @@ describe("sisyphus-task", () => { expect(result).toBe(false) }) - test("PLAN_AGENT_NAMES contains expected values", () => { - // given / #when / #then + test("PLAN_AGENT_NAMES contains only plan and planner (not prometheus)", () => { + //#given / #when / #then expect(PLAN_AGENT_NAMES).toContain("plan") - expect(PLAN_AGENT_NAMES).toContain("prometheus") expect(PLAN_AGENT_NAMES).toContain("planner") + expect(PLAN_AGENT_NAMES).not.toContain("prometheus") }) }) @@ -2258,68 +2258,36 @@ describe("sisyphus-task", () => { expect(result).toBe(buildPlanAgentSystemPrepend(availableCategories, availableSkills)) }) - test("prepends plan agent system prompt when agentName is 'prometheus'", () => { - // given + test("does not prepend plan agent prompt for prometheus agent", () => { + //#given - prometheus is NOT a plan agent (decoupled) const { buildSystemContent } = require("./tools") - const { buildPlanAgentSystemPrepend } = require("./constants") + const skillContent = "You are a strategic planner" - const availableCategories = [ - { - name: "ultrabrain", - description: "Complex architecture, deep logical reasoning", - model: "openai/gpt-5.3-codex", - }, - ] - const availableSkills = [ - { - name: "git-master", - description: "Atomic commits, git operations.", - location: "plugin", - }, - ] - - // when + //#when const result = buildSystemContent({ + skillContent, agentName: "prometheus", - availableCategories, - availableSkills, }) - // then - expect(result).toContain("") - expect(result).toBe(buildPlanAgentSystemPrepend(availableCategories, availableSkills)) + //#then - prometheus should NOT get plan agent system prepend + expect(result).toBe(skillContent) + expect(result).not.toContain("MANDATORY CONTEXT GATHERING PROTOCOL") }) - test("prepends plan agent system prompt when agentName is 'Prometheus' (case insensitive)", () => { - // given + test("does not prepend plan agent prompt for Prometheus (case insensitive)", () => { + //#given - Prometheus (capitalized) is NOT a plan agent const { buildSystemContent } = require("./tools") - const { buildPlanAgentSystemPrepend } = require("./constants") + const skillContent = "You are a strategic planner" - const availableCategories = [ - { - name: "quick", - description: "Trivial tasks", - model: "anthropic/claude-haiku-4-5", - }, - ] - const availableSkills = [ - { - name: "dev-browser", - description: "Persistent browser state automation.", - location: "plugin", - }, - ] - - // when + //#when const result = buildSystemContent({ + skillContent, agentName: "Prometheus", - availableCategories, - availableSkills, }) - // then - expect(result).toContain("") - expect(result).toBe(buildPlanAgentSystemPrepend(availableCategories, availableSkills)) + //#then + expect(result).toBe(skillContent) + expect(result).not.toContain("MANDATORY CONTEXT GATHERING PROTOCOL") }) test("combines plan agent prepend with skill content", () => { @@ -2565,14 +2533,14 @@ describe("sisyphus-task", () => { }) }) - describe("prometheus self-delegation block", () => { - test("prometheus cannot delegate to prometheus - returns error with guidance", async () => { - // given - current agent is prometheus + describe("plan agent self-delegation block", () => { + test("plan agent cannot delegate to plan - returns error with guidance", async () => { + //#given - current agent is plan const { createDelegateTask } = require("./tools") const mockManager = { launch: async () => ({}) } const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), @@ -2592,44 +2560,44 @@ describe("sisyphus-task", () => { const toolContext = { sessionID: "parent-session", messageID: "parent-message", - agent: "prometheus", + agent: "plan", abort: new AbortController().signal, } - // when - prometheus tries to delegate to prometheus + //#when - plan agent tries to delegate to plan const result = await tool.execute( { description: "Test self-delegation block", prompt: "Create a plan", - subagent_type: "prometheus", + subagent_type: "plan", run_in_background: false, load_skills: [], }, toolContext ) - // then - should return error telling prometheus to create plan directly - expect(result).toContain("prometheus") + //#then - should return error telling plan agent to create plan directly + expect(result).toContain("plan agent") expect(result).toContain("directly") }) - test("non-prometheus agent CAN delegate to prometheus - proceeds normally", async () => { - // given - current agent is sisyphus + test("prometheus is NOT a plan agent - can delegate to plan normally", async () => { + //#given - current agent is prometheus (no longer treated as plan agent) const { createDelegateTask } = require("./tools") const mockManager = { launch: async () => ({}) } const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_prometheus_allowed" } }), + create: async () => ({ data: { id: "ses_plan_from_prometheus" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created successfully" }] }] }), - status: async () => ({ data: { "ses_prometheus_allowed": { type: "idle" } } }), + status: async () => ({ data: { "ses_plan_from_prometheus": { type: "idle" } } }), }, } @@ -2641,34 +2609,34 @@ describe("sisyphus-task", () => { const toolContext = { sessionID: "parent-session", messageID: "parent-message", - agent: "sisyphus", + agent: "prometheus", abort: new AbortController().signal, } - // when - sisyphus delegates to prometheus + //#when - prometheus delegates to plan (should work now) const result = await tool.execute( { - description: "Test prometheus delegation from non-prometheus agent", + description: "Test plan delegation from prometheus", prompt: "Create a plan", - subagent_type: "prometheus", + subagent_type: "plan", run_in_background: false, load_skills: [], }, toolContext ) - // then - should proceed normally + //#then - should proceed normally (prometheus is not plan agent) expect(result).not.toContain("Cannot delegate") expect(result).toContain("Plan created successfully") }, { timeout: 20000 }) - test("case-insensitive: Prometheus (capitalized) cannot delegate to prometheus", async () => { - // given - current agent is Prometheus (capitalized) + test("planner agent self-delegation is also blocked", async () => { + //#given - current agent is planner const { createDelegateTask } = require("./tools") const mockManager = { launch: async () => ({}) } const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + app: { agents: async () => ({ data: [{ name: "planner", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), @@ -2688,24 +2656,24 @@ describe("sisyphus-task", () => { const toolContext = { sessionID: "parent-session", messageID: "parent-message", - agent: "Prometheus", + agent: "planner", abort: new AbortController().signal, } - // when - Prometheus tries to delegate to prometheus + //#when - planner tries to delegate to plan const result = await tool.execute( { - description: "Test case-insensitive block", + description: "Test planner self-delegation block", prompt: "Create a plan", - subagent_type: "prometheus", + subagent_type: "plan", run_in_background: false, load_skills: [], }, toolContext ) - // then - should still return error - expect(result).toContain("prometheus") + //#then - should return error (planner is a plan agent alias) + expect(result).toContain("plan agent") expect(result).toContain("directly") }) }) @@ -2903,9 +2871,9 @@ describe("sisyphus-task", () => { }, { timeout: 20000 }) }) - describe("prometheus subagent task permission", () => { - test("prometheus subagent should have task permission enabled", async () => { - // given - sisyphus delegates to prometheus + describe("subagent task permission", () => { + test("plan subagent should have task permission enabled", async () => { + //#given - sisyphus delegates to plan agent const { createDelegateTask } = require("./tools") let promptBody: any @@ -2917,17 +2885,17 @@ describe("sisyphus-task", () => { } const mockClient = { - app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_prometheus_delegate" } }), + create: async () => ({ data: { id: "ses_plan_delegate" } }), prompt: promptMock, promptAsync: promptMock, messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] }), - status: async () => ({ data: { "ses_prometheus_delegate": { type: "idle" } } }), + status: async () => ({ data: { "ses_plan_delegate": { type: "idle" } } }), }, } @@ -2943,10 +2911,65 @@ describe("sisyphus-task", () => { abort: new AbortController().signal, } - // when - sisyphus delegates to prometheus + //#when - sisyphus delegates to plan await tool.execute( { - description: "Test prometheus task permission", + description: "Test plan task permission", + prompt: "Create a plan", + subagent_type: "plan", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + //#then - plan agent should have task permission + expect(promptBody.tools.task).toBe(true) + }, { timeout: 20000 }) + + test("prometheus subagent should NOT have task permission (decoupled from plan)", async () => { + //#given - sisyphus delegates to prometheus (no longer a plan agent) + const { createDelegateTask } = require("./tools") + let promptBody: any + + const mockManager = { launch: async () => ({}) } + + const promptMock = async (input: any) => { + promptBody = input.body + return { data: {} } + } + + const mockClient = { + app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_prometheus_no_task" } }), + prompt: promptMock, + promptAsync: promptMock, + messages: async () => ({ + data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] + }), + status: async () => ({ data: { "ses_prometheus_no_task": { type: "idle" } } }), + }, + } + + const tool = createDelegateTask({ + manager: mockManager, + client: mockClient, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + //#when - sisyphus delegates to prometheus + await tool.execute( + { + description: "Test prometheus no task permission", prompt: "Create a plan", subagent_type: "prometheus", run_in_background: false, @@ -2955,12 +2978,12 @@ describe("sisyphus-task", () => { toolContext ) - // then - prometheus should have task permission - expect(promptBody.tools.task).toBe(true) + //#then - prometheus should NOT have task permission (it's not a plan agent) + expect(promptBody.tools.task).toBe(false) }, { timeout: 20000 }) - test("non-prometheus subagent should NOT have task permission", async () => { - // given - sisyphus delegates to oracle (non-prometheus) + test("non-plan subagent should NOT have task permission", async () => { + //#given - sisyphus delegates to oracle (non-plan) const { createDelegateTask } = require("./tools") let promptBody: any From d769b9586924a3c969dcafabb6f5b0bc6462408a Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:42:23 +0900 Subject: [PATCH 17/30] fix(delegation): use blocking prompt for sync tasks instead of polling Replace promptAsync + manual polling loop with promptSyncWithModelSuggestionRetry (session.prompt) which blocks until the LLM response completes. This matches OpenCode's native task tool behavior and fixes empty/broken responses that occurred when polling declared stability prematurely. Applied to both executeSyncTask and executeSyncContinuation paths. --- src/tools/delegate-task/executor.ts | 96 ++--------------------------- 1 file changed, 4 insertions(+), 92 deletions(-) diff --git a/src/tools/delegate-task/executor.ts b/src/tools/delegate-task/executor.ts index 2a3be2b2d..1f43491b0 100644 --- a/src/tools/delegate-task/executor.ts +++ b/src/tools/delegate-task/executor.ts @@ -12,7 +12,7 @@ import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader import { discoverSkills } from "../../features/opencode-skill-loader" import { getTaskToastManager } from "../../features/task-toast-manager" import { subagentSessions, getSessionAgent } from "../../features/claude-code-session-state" -import { log, getAgentToolRestrictions, resolveModelPipeline, promptWithModelSuggestionRetry } from "../../shared" +import { log, getAgentToolRestrictions, resolveModelPipeline, promptWithModelSuggestionRetry, promptSyncWithModelSuggestionRetry } from "../../shared" import { fetchAvailableModels, isModelAvailable } from "../../shared/model-availability" import { readConnectedProvidersCache } from "../../shared/connected-providers-cache" import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements" @@ -211,7 +211,7 @@ export async function executeSyncContinuation( : undefined } - await (client.session as any).promptAsync({ + await promptSyncWithModelSuggestionRetry(client, { path: { id: args.session_id! }, body: { ...(resumeAgent !== undefined ? { agent: resumeAgent } : {}), @@ -233,30 +233,6 @@ export async function executeSyncContinuation( return `Failed to send continuation prompt: ${errorMessage}\n\nSession ID: ${args.session_id}` } - const timing = getTimingConfig() - const pollStart = Date.now() - let lastMsgCount = 0 - let stablePolls = 0 - - while (Date.now() - pollStart < 60000) { - await new Promise(resolve => setTimeout(resolve, timing.POLL_INTERVAL_MS)) - - const elapsed = Date.now() - pollStart - if (elapsed < timing.SESSION_CONTINUATION_STABILITY_MS) continue - - const messagesCheck = await client.session.messages({ path: { id: args.session_id! } }) - const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array - const currentMsgCount = msgs.length - - if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) { - stablePolls++ - if (stablePolls >= timing.STABILITY_POLLS_REQUIRED) break - } else { - stablePolls = 0 - lastMsgCount = currentMsgCount - } - } - const messagesResult = await client.session.messages({ path: { id: args.session_id! }, }) @@ -621,7 +597,7 @@ export async function executeSyncTask( try { const allowTask = isPlanAgent(agentToUse) - await promptWithModelSuggestionRetry(client, { + await promptSyncWithModelSuggestionRetry(client, { path: { id: sessionID }, body: { agent: agentToUse, @@ -659,70 +635,6 @@ export async function executeSyncTask( }) } - const syncTiming = getTimingConfig() - const pollStart = Date.now() - let lastMsgCount = 0 - let stablePolls = 0 - let pollCount = 0 - - log("[task] Starting poll loop", { sessionID, agentToUse }) - - while (Date.now() - pollStart < syncTiming.MAX_POLL_TIME_MS) { - if (ctx.abort?.aborted) { - log("[task] Aborted by user", { sessionID }) - if (toastManager && taskId) toastManager.removeTask(taskId) - return `Task aborted.\n\nSession ID: ${sessionID}` - } - - await new Promise(resolve => setTimeout(resolve, syncTiming.POLL_INTERVAL_MS)) - pollCount++ - - const statusResult = await client.session.status() - const allStatuses = (statusResult.data ?? {}) as Record - const sessionStatus = allStatuses[sessionID] - - if (pollCount % 10 === 0) { - log("[task] Poll status", { - sessionID, - pollCount, - elapsed: Math.floor((Date.now() - pollStart) / 1000) + "s", - sessionStatus: sessionStatus?.type ?? "not_in_status", - stablePolls, - lastMsgCount, - }) - } - - if (sessionStatus && sessionStatus.type !== "idle") { - stablePolls = 0 - lastMsgCount = 0 - continue - } - - const elapsed = Date.now() - pollStart - if (elapsed < syncTiming.MIN_STABILITY_TIME_MS) { - continue - } - - const messagesCheck = await client.session.messages({ path: { id: sessionID } }) - const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array - const currentMsgCount = msgs.length - - if (currentMsgCount === lastMsgCount) { - stablePolls++ - if (stablePolls >= syncTiming.STABILITY_POLLS_REQUIRED) { - log("[task] Poll complete - messages stable", { sessionID, pollCount, currentMsgCount }) - break - } - } else { - stablePolls = 0 - lastMsgCount = currentMsgCount - } - } - - if (Date.now() - pollStart >= syncTiming.MAX_POLL_TIME_MS) { - log("[task] Poll timeout reached", { sessionID, pollCount, lastMsgCount, stablePolls }) - } - const messagesResult = await client.session.messages({ path: { id: sessionID }, }) @@ -963,7 +875,7 @@ Sisyphus-Junior is spawned automatically when you specify a category. Pick the a return { agentToUse: "", categoryModel: undefined, - error: `You are prometheus. You cannot delegate to prometheus via task. + error: `You are the plan agent. You cannot delegate to plan via task. Create the work plan directly - that's your job as the planning agent.`, } From 7f4338b6ed913b223785ddaf254bec52388c94de Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:52:55 +0900 Subject: [PATCH 18/30] fix: preserve variant in sync continuation to maintain thinking budget --- src/tools/delegate-task/executor.ts | 34 +++++++------ src/tools/delegate-task/tools.test.ts | 71 ++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 16 deletions(-) diff --git a/src/tools/delegate-task/executor.ts b/src/tools/delegate-task/executor.ts index 1f43491b0..aa8ad8140 100644 --- a/src/tools/delegate-task/executor.ts +++ b/src/tools/delegate-task/executor.ts @@ -39,7 +39,7 @@ export interface ParentContext { } interface SessionMessage { - info?: { role?: string; time?: { created?: number }; agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string } + info?: { role?: string; time?: { created?: number }; agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string; variant?: string } parts?: Array<{ type?: string; text?: string }> } @@ -190,6 +190,7 @@ export async function executeSyncContinuation( try { let resumeAgent: string | undefined let resumeModel: { providerID: string; modelID: string } | undefined + let resumeVariant: string | undefined try { const messagesResp = await client.session.messages({ path: { id: args.session_id! } }) @@ -199,6 +200,7 @@ export async function executeSyncContinuation( if (info?.agent || info?.model || (info?.modelID && info?.providerID)) { resumeAgent = info.agent resumeModel = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined) + resumeVariant = info.variant break } } @@ -209,22 +211,24 @@ export async function executeSyncContinuation( resumeModel = resumeMessage?.model?.providerID && resumeMessage?.model?.modelID ? { providerID: resumeMessage.model.providerID, modelID: resumeMessage.model.modelID } : undefined + resumeVariant = resumeMessage?.model?.variant } - await promptSyncWithModelSuggestionRetry(client, { - path: { id: args.session_id! }, - body: { - ...(resumeAgent !== undefined ? { agent: resumeAgent } : {}), - ...(resumeModel !== undefined ? { model: resumeModel } : {}), - tools: { - ...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}), - task: false, - call_omo_agent: true, - question: false, - }, - parts: [{ type: "text", text: args.prompt }], - }, - }) + await promptSyncWithModelSuggestionRetry(client, { + path: { id: args.session_id! }, + body: { + ...(resumeAgent !== undefined ? { agent: resumeAgent } : {}), + ...(resumeModel !== undefined ? { model: resumeModel } : {}), + ...(resumeVariant !== undefined ? { variant: resumeVariant } : {}), + tools: { + ...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}), + task: false, + call_omo_agent: true, + question: false, + }, + parts: [{ type: "text", text: args.prompt }], + }, + }) } catch (promptError) { if (toastManager) { toastManager.removeTask(taskId) diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 773022439..68eca3e52 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -1,5 +1,5 @@ declare const require: (name: string) => any -const { describe, test, expect, beforeEach, afterEach, spyOn } = require("bun:test") +const { describe, test, expect, beforeEach, afterEach, spyOn, mock } = require("bun:test") import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants" import { resolveCategoryConfig } from "./tools" import type { CategoryConfig } from "../../config/schema" @@ -1055,6 +1055,75 @@ describe("sisyphus-task", () => { expect(result).not.toContain("Background task continued") }, { timeout: 10000 }) + test("sync continuation preserves variant from previous session message", async () => { + //#given a session with a previous message that has variant "max" + const { createDelegateTask } = require("./tools") + + const promptMock = mock(async (input: any) => { + return { data: {} } + }) + + const mockClient = { + session: { + prompt: async () => ({ data: {} }), + promptAsync: promptMock, + messages: async () => ({ + data: [ + { + info: { + role: "user", + agent: "sisyphus-junior", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + variant: "max", + time: { created: Date.now() }, + }, + parts: [{ type: "text", text: "previous message" }], + }, + { + info: { role: "assistant", time: { created: Date.now() + 1 } }, + parts: [{ type: "text", text: "Completed." }], + }, + ], + }), + }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + app: { + agents: async () => ({ data: [] }), + }, + } + + const tool = createDelegateTask({ + manager: { resume: async () => ({ id: "task-var", sessionID: "ses_var_test", description: "Variant test", agent: "sisyphus-junior", status: "running" }) }, + client: mockClient, + }) + + const toolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + abort: new AbortController().signal, + } + + //#when continuing the session + await tool.execute( + { + description: "Continue with variant", + prompt: "Continue the task", + session_id: "ses_var_test", + run_in_background: false, + load_skills: [], + }, + toolContext + ) + + //#then promptAsync should include variant from previous message + expect(promptMock).toHaveBeenCalled() + const callArgs = promptMock.mock.calls[0][0] + expect(callArgs.body.variant).toBe("max") + expect(callArgs.body.agent).toBe("sisyphus-junior") + expect(callArgs.body.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" }) + }, { timeout: 10000 }) + test("session_id with background=true should return immediately without waiting", async () => { // given const { createDelegateTask } = require("./tools") From 6b4e149881c5ed964e73b7aed0a58d0487db2e5e Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:57:13 +0900 Subject: [PATCH 19/30] test: assert variant forwarded in sync continuation --- src/tools/delegate-task/tools.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 68eca3e52..37b9f1a84 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -1065,8 +1065,8 @@ describe("sisyphus-task", () => { const mockClient = { session: { - prompt: async () => ({ data: {} }), - promptAsync: promptMock, + prompt: promptMock, + promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [ { @@ -1116,7 +1116,7 @@ describe("sisyphus-task", () => { toolContext ) - //#then promptAsync should include variant from previous message + //#then prompt should include variant from previous message expect(promptMock).toHaveBeenCalled() const callArgs = promptMock.mock.calls[0][0] expect(callArgs.body.variant).toBe("max") From d8e7e4f170e14e8ca775a1ba11f07aad33891865 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 13:30:00 +0900 Subject: [PATCH 20/30] refactor: extract git worktree parser from atlas hook --- src/hooks/atlas/index.ts | 113 +----------------- .../git-worktree/collect-git-diff-stats.ts | 29 +++++ .../git-worktree/format-file-changes.ts | 46 +++++++ src/shared/git-worktree/git-worktree.test.ts | 51 ++++++++ src/shared/git-worktree/index.ts | 5 + src/shared/git-worktree/parse-diff-numstat.ts | 27 +++++ .../git-worktree/parse-status-porcelain.ts | 25 ++++ src/shared/git-worktree/types.ts | 8 ++ src/shared/index.ts | 1 + 9 files changed, 195 insertions(+), 110 deletions(-) create mode 100644 src/shared/git-worktree/collect-git-diff-stats.ts create mode 100644 src/shared/git-worktree/format-file-changes.ts create mode 100644 src/shared/git-worktree/git-worktree.test.ts create mode 100644 src/shared/git-worktree/index.ts create mode 100644 src/shared/git-worktree/parse-diff-numstat.ts create mode 100644 src/shared/git-worktree/parse-status-porcelain.ts create mode 100644 src/shared/git-worktree/types.ts diff --git a/src/hooks/atlas/index.ts b/src/hooks/atlas/index.ts index ffad04598..b2608187e 100644 --- a/src/hooks/atlas/index.ts +++ b/src/hooks/atlas/index.ts @@ -1,5 +1,4 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { execSync } from "node:child_process" import { existsSync, readdirSync } from "node:fs" import { join } from "node:path" import { @@ -12,6 +11,7 @@ import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/ho import { log } from "../../shared/logger" import { createSystemDirective, SYSTEM_DIRECTIVE_PREFIX, SystemDirectiveTypes } from "../../shared/system-directive" import { isCallerOrchestrator, getMessageDir } from "../../shared/session-utils" +import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" import type { BackgroundManager } from "../../features/background-agent" export const HOOK_NAME = "atlas" @@ -269,113 +269,6 @@ function extractSessionIdFromOutput(output: string): string { return match?.[1] ?? "" } -interface GitFileStat { - path: string - added: number - removed: number - status: "modified" | "added" | "deleted" -} - -function getGitDiffStats(directory: string): GitFileStat[] { - try { - const output = execSync("git diff --numstat HEAD", { - cwd: directory, - encoding: "utf-8", - timeout: 5000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - - if (!output) return [] - - const statusOutput = execSync("git status --porcelain", { - cwd: directory, - encoding: "utf-8", - timeout: 5000, - stdio: ["pipe", "pipe", "pipe"], - }).trim() - - const statusMap = new Map() - for (const line of statusOutput.split("\n")) { - if (!line) continue - const status = line.substring(0, 2).trim() - const filePath = line.substring(3) - if (status === "A" || status === "??") { - statusMap.set(filePath, "added") - } else if (status === "D") { - statusMap.set(filePath, "deleted") - } else { - statusMap.set(filePath, "modified") - } - } - - const stats: GitFileStat[] = [] - for (const line of output.split("\n")) { - const parts = line.split("\t") - if (parts.length < 3) continue - - const [addedStr, removedStr, path] = parts - const added = addedStr === "-" ? 0 : parseInt(addedStr, 10) - const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10) - - stats.push({ - path, - added, - removed, - status: statusMap.get(path) ?? "modified", - }) - } - - return stats - } catch { - return [] - } -} - -function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { - if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n" - - const modified = stats.filter((s) => s.status === "modified") - const added = stats.filter((s) => s.status === "added") - const deleted = stats.filter((s) => s.status === "deleted") - - const lines: string[] = ["[FILE CHANGES SUMMARY]"] - - if (modified.length > 0) { - lines.push("Modified files:") - for (const f of modified) { - lines.push(` ${f.path} (+${f.added}, -${f.removed})`) - } - lines.push("") - } - - if (added.length > 0) { - lines.push("Created files:") - for (const f of added) { - lines.push(` ${f.path} (+${f.added})`) - } - lines.push("") - } - - if (deleted.length > 0) { - lines.push("Deleted files:") - for (const f of deleted) { - lines.push(` ${f.path} (-${f.removed})`) - } - lines.push("") - } - - if (notepadPath) { - const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus")) - if (notepadStat) { - lines.push("[NOTEPAD UPDATED]") - lines.push(` ${notepadStat.path} (+${notepadStat.added})`) - lines.push("") - } - } - - return lines.join("\n") -} - interface ToolExecuteAfterInput { tool: string sessionID?: string @@ -750,8 +643,8 @@ export function createAtlasHook( } if (output.output && typeof output.output === "string") { - const gitStats = getGitDiffStats(ctx.directory) - const fileChanges = formatFileChanges(gitStats) + const gitStats = collectGitDiffStats(ctx.directory) + const fileChanges = formatFileChanges(gitStats) const subagentSessionId = extractSessionIdFromOutput(output.output) const boulderState = readBoulderState(ctx.directory) diff --git a/src/shared/git-worktree/collect-git-diff-stats.ts b/src/shared/git-worktree/collect-git-diff-stats.ts new file mode 100644 index 000000000..158a09d82 --- /dev/null +++ b/src/shared/git-worktree/collect-git-diff-stats.ts @@ -0,0 +1,29 @@ +import { execSync } from "node:child_process" +import { parseGitStatusPorcelain } from "./parse-status-porcelain" +import { parseGitDiffNumstat } from "./parse-diff-numstat" +import type { GitFileStat } from "./types" + +export function collectGitDiffStats(directory: string): GitFileStat[] { + try { + const diffOutput = execSync("git diff --numstat HEAD", { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + + if (!diffOutput) return [] + + const statusOutput = execSync("git status --porcelain", { + cwd: directory, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() + + const statusMap = parseGitStatusPorcelain(statusOutput) + return parseGitDiffNumstat(diffOutput, statusMap) + } catch { + return [] + } +} diff --git a/src/shared/git-worktree/format-file-changes.ts b/src/shared/git-worktree/format-file-changes.ts new file mode 100644 index 000000000..5afb58b8c --- /dev/null +++ b/src/shared/git-worktree/format-file-changes.ts @@ -0,0 +1,46 @@ +import type { GitFileStat } from "./types" + +export function formatFileChanges(stats: GitFileStat[], notepadPath?: string): string { + if (stats.length === 0) return "[FILE CHANGES SUMMARY]\nNo file changes detected.\n" + + const modified = stats.filter((s) => s.status === "modified") + const added = stats.filter((s) => s.status === "added") + const deleted = stats.filter((s) => s.status === "deleted") + + const lines: string[] = ["[FILE CHANGES SUMMARY]"] + + if (modified.length > 0) { + lines.push("Modified files:") + for (const f of modified) { + lines.push(` ${f.path} (+${f.added}, -${f.removed})`) + } + lines.push("") + } + + if (added.length > 0) { + lines.push("Created files:") + for (const f of added) { + lines.push(` ${f.path} (+${f.added})`) + } + lines.push("") + } + + if (deleted.length > 0) { + lines.push("Deleted files:") + for (const f of deleted) { + lines.push(` ${f.path} (-${f.removed})`) + } + lines.push("") + } + + if (notepadPath) { + const notepadStat = stats.find((s) => s.path.includes("notepad") || s.path.includes(".sisyphus")) + if (notepadStat) { + lines.push("[NOTEPAD UPDATED]") + lines.push(` ${notepadStat.path} (+${notepadStat.added})`) + lines.push("") + } + } + + return lines.join("\n") +} diff --git a/src/shared/git-worktree/git-worktree.test.ts b/src/shared/git-worktree/git-worktree.test.ts new file mode 100644 index 000000000..27183018b --- /dev/null +++ b/src/shared/git-worktree/git-worktree.test.ts @@ -0,0 +1,51 @@ +/// + +import { describe, expect, test } from "bun:test" +import { formatFileChanges, parseGitDiffNumstat, parseGitStatusPorcelain } from "./index" + +describe("git-worktree", () => { + test("#given status porcelain output #when parsing #then maps paths to statuses", () => { + const porcelain = [ + " M src/a.ts", + "A src/b.ts", + "?? src/c.ts", + "D src/d.ts", + ].join("\n") + + const map = parseGitStatusPorcelain(porcelain) + expect(map.get("src/a.ts")).toBe("modified") + expect(map.get("src/b.ts")).toBe("added") + expect(map.get("src/c.ts")).toBe("added") + expect(map.get("src/d.ts")).toBe("deleted") + }) + + test("#given diff numstat and status map #when parsing #then returns typed stats", () => { + const porcelain = [" M src/a.ts", "A src/b.ts"].join("\n") + const statusMap = parseGitStatusPorcelain(porcelain) + + const numstat = ["1\t2\tsrc/a.ts", "3\t0\tsrc/b.ts", "-\t-\tbin.dat"].join("\n") + const stats = parseGitDiffNumstat(numstat, statusMap) + + expect(stats).toEqual([ + { path: "src/a.ts", added: 1, removed: 2, status: "modified" }, + { path: "src/b.ts", added: 3, removed: 0, status: "added" }, + { path: "bin.dat", added: 0, removed: 0, status: "modified" }, + ]) + }) + + test("#given git file stats #when formatting #then produces grouped summary", () => { + const summary = formatFileChanges([ + { path: "src/a.ts", added: 1, removed: 2, status: "modified" }, + { path: "src/b.ts", added: 3, removed: 0, status: "added" }, + { path: "src/c.ts", added: 0, removed: 4, status: "deleted" }, + ]) + + expect(summary).toContain("[FILE CHANGES SUMMARY]") + expect(summary).toContain("Modified files:") + expect(summary).toContain("Created files:") + expect(summary).toContain("Deleted files:") + expect(summary).toContain("src/a.ts") + expect(summary).toContain("src/b.ts") + expect(summary).toContain("src/c.ts") + }) +}) diff --git a/src/shared/git-worktree/index.ts b/src/shared/git-worktree/index.ts new file mode 100644 index 000000000..0bc363d0f --- /dev/null +++ b/src/shared/git-worktree/index.ts @@ -0,0 +1,5 @@ +export type { GitFileStatus, GitFileStat } from "./types" +export { parseGitStatusPorcelain } from "./parse-status-porcelain" +export { parseGitDiffNumstat } from "./parse-diff-numstat" +export { collectGitDiffStats } from "./collect-git-diff-stats" +export { formatFileChanges } from "./format-file-changes" diff --git a/src/shared/git-worktree/parse-diff-numstat.ts b/src/shared/git-worktree/parse-diff-numstat.ts new file mode 100644 index 000000000..3ea2b0f6d --- /dev/null +++ b/src/shared/git-worktree/parse-diff-numstat.ts @@ -0,0 +1,27 @@ +import type { GitFileStat, GitFileStatus } from "./types" + +export function parseGitDiffNumstat( + output: string, + statusMap: Map +): GitFileStat[] { + if (!output) return [] + + const stats: GitFileStat[] = [] + for (const line of output.split("\n")) { + const parts = line.split("\t") + if (parts.length < 3) continue + + const [addedStr, removedStr, path] = parts + const added = addedStr === "-" ? 0 : parseInt(addedStr, 10) + const removed = removedStr === "-" ? 0 : parseInt(removedStr, 10) + + stats.push({ + path, + added, + removed, + status: statusMap.get(path) ?? "modified", + }) + } + + return stats +} diff --git a/src/shared/git-worktree/parse-status-porcelain.ts b/src/shared/git-worktree/parse-status-porcelain.ts new file mode 100644 index 000000000..0623de5d9 --- /dev/null +++ b/src/shared/git-worktree/parse-status-porcelain.ts @@ -0,0 +1,25 @@ +import type { GitFileStatus } from "./types" + +export function parseGitStatusPorcelain(output: string): Map { + const map = new Map() + if (!output) return map + + for (const line of output.split("\n")) { + if (!line) continue + + const status = line.substring(0, 2).trim() + const filePath = line.substring(3) + + if (!filePath) continue + + if (status === "A" || status === "??") { + map.set(filePath, "added") + } else if (status === "D") { + map.set(filePath, "deleted") + } else { + map.set(filePath, "modified") + } + } + + return map +} diff --git a/src/shared/git-worktree/types.ts b/src/shared/git-worktree/types.ts new file mode 100644 index 000000000..eb4236990 --- /dev/null +++ b/src/shared/git-worktree/types.ts @@ -0,0 +1,8 @@ +export type GitFileStatus = "modified" | "added" | "deleted" + +export interface GitFileStat { + path: string + added: number + removed: number + status: GitFileStatus +} diff --git a/src/shared/index.ts b/src/shared/index.ts index d42be5a75..4ea346972 100644 --- a/src/shared/index.ts +++ b/src/shared/index.ts @@ -41,5 +41,6 @@ export * from "./tmux" export * from "./model-suggestion-retry" export * from "./opencode-server-auth" export * from "./port-utils" +export * from "./git-worktree" export * from "./safe-create-hook" export * from "./truncate-description" From e7f4f6dd137bf0541e89bca20f2392df32e2aa03 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 14:05:53 +0900 Subject: [PATCH 21/30] fix: deny todowrite/todoread per-agent when task_system is enabled When experimental.task_system is enabled, add todowrite: deny and todoread: deny to per-agent permissions for all primary agents (sisyphus, hephaestus, atlas, prometheus, sisyphus-junior). This ensures the model never sees these tools in its tool list, complementing the existing global tools config and runtime hook. --- src/plugin-handlers/config-handler.test.ts | 105 +++++++++++++++++++++ src/plugin-handlers/config-handler.ts | 15 ++- 2 files changed, 115 insertions(+), 5 deletions(-) diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts index e88f2b4e4..08c58f6f0 100644 --- a/src/plugin-handlers/config-handler.test.ts +++ b/src/plugin-handlers/config-handler.test.ts @@ -943,3 +943,108 @@ describe("config-handler plugin loading error boundary (#1559)", () => { expect(commands["test-cmd"]).toBeDefined() }) }) + +describe("per-agent todowrite/todoread deny when task_system enabled", () => { + const PRIMARY_AGENTS = ["sisyphus", "hephaestus", "atlas", "prometheus", "sisyphus-junior"] + + test("denies todowrite and todoread for primary agents when task_system is enabled", async () => { + //#given + spyOn(agents, "createBuiltinAgents" as any).mockResolvedValue({ + sisyphus: { name: "sisyphus", prompt: "test", mode: "primary" }, + hephaestus: { name: "hephaestus", prompt: "test", mode: "primary" }, + atlas: { name: "atlas", prompt: "test", mode: "primary" }, + prometheus: { name: "prometheus", prompt: "test", mode: "primary" }, + "sisyphus-junior": { name: "sisyphus-junior", prompt: "test", mode: "subagent" }, + oracle: { name: "oracle", prompt: "test", mode: "subagent" }, + }) + + const pluginConfig: OhMyOpenCodeConfig = { + experimental: { task_system: true }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then + const agentResult = config.agent as Record }> + for (const agentName of PRIMARY_AGENTS) { + expect(agentResult[agentName]?.permission?.todowrite).toBe("deny") + expect(agentResult[agentName]?.permission?.todoread).toBe("deny") + } + }) + + test("does not deny todowrite/todoread when task_system is disabled", async () => { + //#given + spyOn(agents, "createBuiltinAgents" as any).mockResolvedValue({ + sisyphus: { name: "sisyphus", prompt: "test", mode: "primary" }, + hephaestus: { name: "hephaestus", prompt: "test", mode: "primary" }, + }) + + const pluginConfig: OhMyOpenCodeConfig = { + experimental: { task_system: false }, + } + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then + const agentResult = config.agent as Record }> + expect(agentResult.sisyphus?.permission?.todowrite).toBeUndefined() + expect(agentResult.sisyphus?.permission?.todoread).toBeUndefined() + expect(agentResult.hephaestus?.permission?.todowrite).toBeUndefined() + expect(agentResult.hephaestus?.permission?.todoread).toBeUndefined() + }) + + test("does not deny todowrite/todoread when task_system is undefined", async () => { + //#given + spyOn(agents, "createBuiltinAgents" as any).mockResolvedValue({ + sisyphus: { name: "sisyphus", prompt: "test", mode: "primary" }, + }) + + const pluginConfig: OhMyOpenCodeConfig = {} + const config: Record = { + model: "anthropic/claude-opus-4-6", + agent: {}, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp" }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + //#when + await handler(config) + + //#then + const agentResult = config.agent as Record }> + expect(agentResult.sisyphus?.permission?.todowrite).toBeUndefined() + expect(agentResult.sisyphus?.permission?.todoread).toBeUndefined() + }) +}) diff --git a/src/plugin-handlers/config-handler.ts b/src/plugin-handlers/config-handler.ts index ea7c2856c..ed9bf0f7a 100644 --- a/src/plugin-handlers/config-handler.ts +++ b/src/plugin-handlers/config-handler.ts @@ -436,6 +436,11 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { // In CLI run mode, deny Question tool for all agents (no TUI to answer questions) const isCliRunMode = process.env.OPENCODE_CLI_RUN_MODE === "true"; const questionPermission = isCliRunMode ? "deny" : "allow"; + + // When task system is enabled, deny todowrite/todoread per-agent so models never see them + const todoPermission = pluginConfig.experimental?.task_system + ? { todowrite: "deny" as const, todoread: "deny" as const } + : {}; if (agentResult.librarian) { const agent = agentResult.librarian as AgentWithPermission; @@ -447,23 +452,23 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { } if (agentResult["atlas"]) { const agent = agentResult["atlas"] as AgentWithPermission; - agent.permission = { ...agent.permission, task: "allow", call_omo_agent: "deny", "task_*": "allow", teammate: "allow" }; + agent.permission = { ...agent.permission, ...todoPermission, task: "allow", call_omo_agent: "deny", "task_*": "allow", teammate: "allow" }; } if (agentResult.sisyphus) { const agent = agentResult.sisyphus as AgentWithPermission; - agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" }; + agent.permission = { ...agent.permission, ...todoPermission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" }; } if (agentResult.hephaestus) { const agent = agentResult.hephaestus as AgentWithPermission; - agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission }; + agent.permission = { ...agent.permission, ...todoPermission, call_omo_agent: "deny", task: "allow", question: questionPermission }; } if (agentResult["prometheus"]) { const agent = agentResult["prometheus"] as AgentWithPermission; - agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" }; + agent.permission = { ...agent.permission, ...todoPermission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" }; } if (agentResult["sisyphus-junior"]) { const agent = agentResult["sisyphus-junior"] as AgentWithPermission; - agent.permission = { ...agent.permission, task: "allow", "task_*": "allow", teammate: "allow" }; + agent.permission = { ...agent.permission, ...todoPermission, task: "allow", "task_*": "allow", teammate: "allow" }; } config.permission = { From 44415e3f59ff81aa81788b366216f2b8810d2c00 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 14:19:50 +0900 Subject: [PATCH 22/30] fix(mcp): remove duplicate x-api-key header from Exa config (#1627) --- bun.lock | 28 ++++++++++++++-------------- src/mcp/websearch.test.ts | 19 ++++++++++++++++--- src/mcp/websearch.ts | 3 --- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/bun.lock b/bun.lock index 7c5f969e3..4a416c88d 100644 --- a/bun.lock +++ b/bun.lock @@ -28,13 +28,13 @@ "typescript": "^5.7.3", }, "optionalDependencies": { - "oh-my-opencode-darwin-arm64": "3.3.0", - "oh-my-opencode-darwin-x64": "3.3.0", - "oh-my-opencode-linux-arm64": "3.3.0", - "oh-my-opencode-linux-arm64-musl": "3.3.0", - "oh-my-opencode-linux-x64": "3.3.0", - "oh-my-opencode-linux-x64-musl": "3.3.0", - "oh-my-opencode-windows-x64": "3.3.0", + "oh-my-opencode-darwin-arm64": "3.3.1", + "oh-my-opencode-darwin-x64": "3.3.1", + "oh-my-opencode-linux-arm64": "3.3.1", + "oh-my-opencode-linux-arm64-musl": "3.3.1", + "oh-my-opencode-linux-x64": "3.3.1", + "oh-my-opencode-linux-x64-musl": "3.3.1", + "oh-my-opencode-windows-x64": "3.3.1", }, }, }, @@ -226,19 +226,19 @@ "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], - "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.0", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-P2kZKJqZaA4j0qtGM3I8+ZeH204ai27ni/OXLjtFdOewRjJgrahxaC1XslgK7q/KU9fXz6BQfEqAjbvyPf/rgQ=="], + "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-R+o42Km6bsIaW6D3I8uu2HCF3BjIWqa/fg38W5y4hJEOw4mL0Q7uV4R+0vtrXRHo9crXTK9ag0fqVQUm+Y6iAQ=="], - "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.0", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RopOorbW1WyhMQJ+ipuqiOA1GICS+3IkOwNyEe0KZlCLpoEDTyFopIL87HSns+gEQPMxnknroDp8lzxn1AKgjw=="], + "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7VTbpR1vH3OEkoJxBKtYuxFPX8M3IbJKoeHWME9iK6FpT11W1ASsjyuhvzB1jcxSeqF8ddMnjitlG5ub6h5EVw=="], - "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-297iEfuK+05g+q64crPW78Zbgm/j5PGjDDweSPkZ6rI6SEfHMvOIkGxMvN8gugM3zcH8FOCQXoY2nC8b6x3pwQ=="], + "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-BZ/r/CFlvbOxkdZZrRoT16xFOjibRZHuwQnaE4f0JvOzgK6/HWp3zJI1+2/aX/oK5GA6lZxNWRrJC/SKUi8LEg=="], - "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-oVxP0+yn66HQYfrl9QT6I7TumRzciuPB4z24+PwKEVcDjPbWXQqLY1gwOGHZAQBPLf0vwewv9ybEDVD42RRH4g=="], + "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-U90Wruf21h+CJbtcrS7MeTAc/5VOF6RI+5jr7qj/cCxjXNJtjhyJdz/maehArjtgf304+lYCM/Mh1i+G2D3YFQ=="], - "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-k9LoLkisLJwJNR1J0Bh1bjGtGBkl5D9WzFPSdZCAlyiT6TgG9w5erPTlXqtl2Lt0We5tYUVYlkEIHRMK/ugNsQ=="], + "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-sYzohSNdwsAhivbXcbhPdF1qqQi2CCI7FSgbmvvfBOMyZ8HAgqOFqYW2r3GPdmtywzkjOTvCzTG56FZwEjx15w=="], - "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7asXCeae7wBxJrzoZ7J6Yo1oaOxwUN3bTO7jWurCTMs5TDHO+pEHysgv/nuF1jvj1T+r1vg1H5ZmopuKy1qvXg=="], + "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aG5pZ4eWS0YSGUicOnjMkUPrIqQV4poYF+d9SIvrfvlaMcK6WlQn7jXzgNCwJsfGn5lyhSmjshZBEU+v79Ua3w=="], - "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.0", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ABvwfaXb2xdrpbivzlPPJzIm5vXp+QlVakkaHEQf3TU6Mi/+fehH6Qhq/KMh66FDO2gq3xmxbH7nktHRQp9kNA=="], + "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-FGH7cnzBqNwjSkzCDglMsVttaq+MsykAxa7ehaFK+0dnBZArvllS3W13a3dGaANHMZzfK0vz8hNDUdVi7Z63cA=="], "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], diff --git a/src/mcp/websearch.test.ts b/src/mcp/websearch.test.ts index f29bf6634..5c7bd5c44 100644 --- a/src/mcp/websearch.test.ts +++ b/src/mcp/websearch.test.ts @@ -37,7 +37,7 @@ describe("websearch MCP provider configuration", () => { expect(result.type).toBe("remote") }) - test("includes x-api-key header when EXA_API_KEY is set", () => { + test("adds exaApiKey query param when EXA_API_KEY is set", () => { //#given const apiKey = "test-exa-key-12345" process.env.EXA_API_KEY = apiKey @@ -46,7 +46,19 @@ describe("websearch MCP provider configuration", () => { const result = createWebsearchConfig() //#then - expect(result.headers).toEqual({ "x-api-key": apiKey }) + expect(result.url).toContain(`exaApiKey=${encodeURIComponent(apiKey)}`) + }) + + test("does not set x-api-key header when EXA_API_KEY is set", () => { + //#given + const apiKey = "test-exa-key-12345" + process.env.EXA_API_KEY = apiKey + + //#when + const result = createWebsearchConfig() + + //#then + expect(result.headers).toBeUndefined() }) test("returns Tavily config when provider is 'tavily' and TAVILY_API_KEY set", () => { @@ -85,7 +97,8 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.url).toContain("mcp.exa.ai") - expect(result.headers).toEqual({ "x-api-key": "test-exa-key" }) + expect(result.url).toContain("exaApiKey=") + expect(result.headers).toBeUndefined() }) test("Tavily config uses Authorization Bearer header format", () => { diff --git a/src/mcp/websearch.ts b/src/mcp/websearch.ts index aa129d846..a306ac49b 100644 --- a/src/mcp/websearch.ts +++ b/src/mcp/websearch.ts @@ -35,9 +35,6 @@ export function createWebsearchConfig(config?: WebsearchConfig): RemoteMcpConfig ? `https://mcp.exa.ai/mcp?tools=web_search_exa&exaApiKey=${encodeURIComponent(process.env.EXA_API_KEY)}` : "https://mcp.exa.ai/mcp?tools=web_search_exa", enabled: true, - headers: process.env.EXA_API_KEY - ? { "x-api-key": process.env.EXA_API_KEY } - : undefined, oauth: false as const, } } From 4738379ad73d3ba98eed1d66abc54e00122d1259 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 14:34:11 +0900 Subject: [PATCH 23/30] fix(lsp): reset safety block on server restart to prevent permanent blocks (#1366) --- .../claude-code-mcp-loader/loader.test.ts | 11 ++ .../unstable-agent-babysitter/index.test.ts | 6 +- src/tools/lsp/client.test.ts | 114 +++++++++++++++++- src/tools/lsp/client.ts | 82 ++++++++++--- 4 files changed, 193 insertions(+), 20 deletions(-) diff --git a/src/features/claude-code-mcp-loader/loader.test.ts b/src/features/claude-code-mcp-loader/loader.test.ts index 8a1b9f6ea..a7fffe7e2 100644 --- a/src/features/claude-code-mcp-loader/loader.test.ts +++ b/src/features/claude-code-mcp-loader/loader.test.ts @@ -8,6 +8,17 @@ const TEST_DIR = join(tmpdir(), "mcp-loader-test-" + Date.now()) describe("getSystemMcpServerNames", () => { beforeEach(() => { mkdirSync(TEST_DIR, { recursive: true }) + + // Isolate tests from real user environment (e.g., ~/.claude.json). + // loader.ts reads user-level config via os.homedir() + getClaudeConfigDir(). + mock.module("os", () => ({ + homedir: () => TEST_DIR, + tmpdir, + })) + + mock.module("../../shared", () => ({ + getClaudeConfigDir: () => join(TEST_DIR, ".claude"), + })) }) afterEach(() => { diff --git a/src/hooks/unstable-agent-babysitter/index.test.ts b/src/hooks/unstable-agent-babysitter/index.test.ts index f9900e7d5..355072b56 100644 --- a/src/hooks/unstable-agent-babysitter/index.test.ts +++ b/src/hooks/unstable-agent-babysitter/index.test.ts @@ -1,8 +1,9 @@ +import { afterEach, describe, expect, test } from "bun:test" import { _resetForTesting, setMainSession } from "../../features/claude-code-session-state" import type { BackgroundTask } from "../../features/background-agent" import { createUnstableAgentBabysitterHook } from "./index" -const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode" +const projectDir = process.cwd() type BabysitterContext = Parameters[0] @@ -21,6 +22,9 @@ function createMockPluginInput(options: { prompt: async (input: unknown) => { promptCalls.push({ input }) }, + promptAsync: async (input: unknown) => { + promptCalls.push({ input }) + }, }, }, } diff --git a/src/tools/lsp/client.test.ts b/src/tools/lsp/client.test.ts index 3ca6ee3b8..8c805d144 100644 --- a/src/tools/lsp/client.test.ts +++ b/src/tools/lsp/client.test.ts @@ -2,7 +2,7 @@ import { mkdtempSync, rmSync, writeFileSync } from "node:fs" import { join } from "node:path" import { tmpdir } from "node:os" -import { describe, it, expect, spyOn, mock } from "bun:test" +import { describe, it, expect, spyOn, mock, beforeEach, afterEach } from "bun:test" mock.module("vscode-jsonrpc/node", () => ({ createMessageConnection: () => { @@ -12,10 +12,18 @@ mock.module("vscode-jsonrpc/node", () => ({ StreamMessageWriter: function StreamMessageWriter() {}, })) -import { LSPClient, validateCwd } from "./client" +import { LSPClient, lspManager, validateCwd } from "./client" import type { ResolvedServer } from "./types" describe("LSPClient", () => { + beforeEach(async () => { + await lspManager.stopAll() + }) + + afterEach(async () => { + await lspManager.stopAll() + }) + describe("openFile", () => { it("sends didChange when a previously opened file changes on disk", async () => { // #given @@ -61,6 +69,108 @@ describe("LSPClient", () => { }) }) + describe("LSPServerManager", () => { + it("recreates client after init failure instead of staying permanently blocked", async () => { + //#given + const dir = mkdtempSync(join(tmpdir(), "lsp-manager-test-")) + + const server: ResolvedServer = { + id: "typescript", + command: ["typescript-language-server", "--stdio"], + extensions: [".ts"], + priority: 0, + } + + const startSpy = spyOn(LSPClient.prototype, "start") + const initializeSpy = spyOn(LSPClient.prototype, "initialize") + const isAliveSpy = spyOn(LSPClient.prototype, "isAlive") + const stopSpy = spyOn(LSPClient.prototype, "stop") + + startSpy.mockImplementationOnce(async () => { + throw new Error("boom") + }) + startSpy.mockImplementation(async () => {}) + initializeSpy.mockImplementation(async () => {}) + isAliveSpy.mockImplementation(() => true) + stopSpy.mockImplementation(async () => {}) + + try { + //#when + await expect(lspManager.getClient(dir, server)).rejects.toThrow("boom") + + const client = await lspManager.getClient(dir, server) + + //#then + expect(client).toBeInstanceOf(LSPClient) + expect(startSpy).toHaveBeenCalledTimes(2) + expect(stopSpy).toHaveBeenCalled() + } finally { + startSpy.mockRestore() + initializeSpy.mockRestore() + isAliveSpy.mockRestore() + stopSpy.mockRestore() + rmSync(dir, { recursive: true, force: true }) + } + }) + + it("resets stale initializing entry so a hung init does not permanently block future clients", async () => { + //#given + const dir = mkdtempSync(join(tmpdir(), "lsp-manager-stale-test-")) + + const server: ResolvedServer = { + id: "typescript", + command: ["typescript-language-server", "--stdio"], + extensions: [".ts"], + priority: 0, + } + + const dateNowSpy = spyOn(Date, "now") + + const startSpy = spyOn(LSPClient.prototype, "start") + const initializeSpy = spyOn(LSPClient.prototype, "initialize") + const isAliveSpy = spyOn(LSPClient.prototype, "isAlive") + const stopSpy = spyOn(LSPClient.prototype, "stop") + + // First client init hangs forever. + const never = new Promise(() => {}) + startSpy.mockImplementationOnce(async () => { + await never + }) + + // Second attempt should be allowed after stale reset. + startSpy.mockImplementationOnce(async () => {}) + startSpy.mockImplementation(async () => {}) + initializeSpy.mockImplementation(async () => {}) + isAliveSpy.mockImplementation(() => true) + stopSpy.mockImplementation(async () => {}) + + try { + //#when + dateNowSpy.mockReturnValueOnce(0) + lspManager.warmupClient(dir, server) + + dateNowSpy.mockReturnValueOnce(60_000) + + const client = await Promise.race([ + lspManager.getClient(dir, server), + new Promise((_, reject) => setTimeout(() => reject(new Error("test-timeout")), 50)), + ]) + + //#then + expect(client).toBeInstanceOf(LSPClient) + expect(startSpy).toHaveBeenCalledTimes(2) + expect(stopSpy).toHaveBeenCalled() + } finally { + dateNowSpy.mockRestore() + startSpy.mockRestore() + initializeSpy.mockRestore() + isAliveSpy.mockRestore() + stopSpy.mockRestore() + rmSync(dir, { recursive: true, force: true }) + } + }) + }) + describe("validateCwd", () => { it("returns valid for existing directory", () => { // #given diff --git a/src/tools/lsp/client.ts b/src/tools/lsp/client.ts index 57f9a5c8c..ae4fb9ab9 100644 --- a/src/tools/lsp/client.ts +++ b/src/tools/lsp/client.ts @@ -221,6 +221,7 @@ interface ManagedClient { refCount: number initPromise?: Promise isInitializing: boolean + initializingSince?: number } class LSPServerManager { @@ -228,6 +229,7 @@ class LSPServerManager { private clients = new Map() private cleanupInterval: ReturnType | null = null private readonly IDLE_TIMEOUT = 5 * 60 * 1000 + private readonly INIT_TIMEOUT = 60 * 1000 private constructor() { this.startCleanupTimer() @@ -309,17 +311,43 @@ class LSPServerManager { const key = this.getKey(root, server.id) let managed = this.clients.get(key) + if (managed) { + const now = Date.now() + if (managed.isInitializing && managed.initializingSince !== undefined && now - managed.initializingSince >= this.INIT_TIMEOUT) { + // Stale init can permanently block subsequent calls (e.g., LSP process hang) + try { + await managed.client.stop() + } catch {} + this.clients.delete(key) + managed = undefined + } + } + if (managed) { if (managed.initPromise) { - await managed.initPromise + try { + await managed.initPromise + } catch { + // Failed init should not keep the key blocked forever. + try { + await managed.client.stop() + } catch {} + this.clients.delete(key) + managed = undefined + } } - if (managed.client.isAlive()) { - managed.refCount++ - managed.lastUsedAt = Date.now() - return managed.client + + if (managed) { + if (managed.client.isAlive()) { + managed.refCount++ + managed.lastUsedAt = Date.now() + return managed.client + } + try { + await managed.client.stop() + } catch {} + this.clients.delete(key) } - await managed.client.stop() - this.clients.delete(key) } const client = new LSPClient(root, server) @@ -328,19 +356,30 @@ class LSPServerManager { await client.initialize() })() + const initStartedAt = Date.now() this.clients.set(key, { client, - lastUsedAt: Date.now(), + lastUsedAt: initStartedAt, refCount: 1, initPromise, isInitializing: true, + initializingSince: initStartedAt, }) - await initPromise + try { + await initPromise + } catch (error) { + this.clients.delete(key) + try { + await client.stop() + } catch {} + throw error + } const m = this.clients.get(key) if (m) { m.initPromise = undefined m.isInitializing = false + m.initializingSince = undefined } return client @@ -356,21 +395,30 @@ class LSPServerManager { await client.initialize() })() + const initStartedAt = Date.now() this.clients.set(key, { client, - lastUsedAt: Date.now(), + lastUsedAt: initStartedAt, refCount: 0, initPromise, isInitializing: true, + initializingSince: initStartedAt, }) - initPromise.then(() => { - const m = this.clients.get(key) - if (m) { - m.initPromise = undefined - m.isInitializing = false - } - }) + initPromise + .then(() => { + const m = this.clients.get(key) + if (m) { + m.initPromise = undefined + m.isInitializing = false + m.initializingSince = undefined + } + }) + .catch(() => { + // Warmup failures must not permanently block future initialization. + this.clients.delete(key) + void client.stop().catch(() => {}) + }) } releaseClient(root: string, serverId: string): void { From 676ff513fa896152d25570e65fe555ab0a49d1e4 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 14:50:36 +0900 Subject: [PATCH 24/30] fix: detect completion tags in ralph/ULW loop to stop iteration (#1233) --- src/hooks/ralph-loop/index.test.ts | 92 ++++++++++++++++++++---------- src/hooks/ralph-loop/index.ts | 66 +++++++++++++++------ 2 files changed, 111 insertions(+), 47 deletions(-) diff --git a/src/hooks/ralph-loop/index.test.ts b/src/hooks/ralph-loop/index.test.ts index 9c7ce4f13..851e0ce1e 100644 --- a/src/hooks/ralph-loop/index.test.ts +++ b/src/hooks/ralph-loop/index.test.ts @@ -511,6 +511,38 @@ describe("ralph-loop", () => { expect(messagesCalls[0].sessionID).toBe("session-123") }) + test("should detect completion promise in reasoning part via session messages API", async () => { + //#given - active loop with assistant reasoning containing completion promise + mockSessionMessages = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Build something" }] }, + { + info: { role: "assistant" }, + parts: [ + { type: "reasoning", text: "I am done now. REASONING_DONE" }, + ], + }, + ] + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), + }) + hook.startLoop("session-123", "Build something", { + completionPromise: "REASONING_DONE", + }) + + //#when - session goes idle + await hook.event({ + event: { + type: "session.idle", + properties: { sessionID: "session-123" }, + }, + }) + + //#then - loop completed via API detection, no continuation + expect(promptCalls.length).toBe(0) + expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true) + expect(hook.getState()).toBeNull() + }) + test("should handle multiple iterations correctly", async () => { // given - active loop const hook = createRalphLoopHook(createMockPluginInput()) @@ -596,13 +628,14 @@ describe("ralph-loop", () => { expect(promptCalls.length).toBe(1) }) - test("should only check LAST assistant message for completion", async () => { - // given - multiple assistant messages, only first has completion promise + test("should check last 3 assistant messages for completion", async () => { + // given - multiple assistant messages, promise in recent (not last) assistant message mockSessionMessages = [ { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "I'll work on it. DONE" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Working on it." }] }, { info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Working on more features..." }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Nearly there... DONE" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "(extra output after promise)" }] }, ] const hook = createRalphLoopHook(createMockPluginInput(), { getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), @@ -614,35 +647,36 @@ describe("ralph-loop", () => { event: { type: "session.idle", properties: { sessionID: "session-123" } }, }) - // then - loop should continue (last message has no completion promise) - expect(promptCalls.length).toBe(1) - expect(hook.getState()?.iteration).toBe(2) - }) - - test("should detect completion only in LAST assistant message", async () => { - // given - last assistant message has completion promise - mockSessionMessages = [ - { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Starting work..." }] }, - { info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Task complete! DONE" }] }, - ] - const hook = createRalphLoopHook(createMockPluginInput(), { - getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), - }) - hook.startLoop("session-123", "Build something", { completionPromise: "DONE" }) - - // when - session goes idle - await hook.event({ - event: { type: "session.idle", properties: { sessionID: "session-123" } }, - }) - - // then - loop should complete (last message has completion promise) + // then - loop should complete (promise found within last 3 assistant messages) expect(promptCalls.length).toBe(0) expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true) expect(hook.getState()).toBeNull() }) + test("should NOT detect completion if promise is older than last 3 assistant messages", async () => { + // given - promise appears in an assistant message older than last 3 + mockSessionMessages = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early DONE" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 1" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 2" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "More work 3" }] }, + ] + const hook = createRalphLoopHook(createMockPluginInput(), { + getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"), + }) + hook.startLoop("session-123", "Build something", { completionPromise: "DONE" }) + + // when - session goes idle + await hook.event({ + event: { type: "session.idle", properties: { sessionID: "session-123" } }, + }) + + // then - loop should continue (promise is older than last 3 assistant messages) + expect(promptCalls.length).toBe(1) + expect(hook.getState()?.iteration).toBe(2) + }) + test("should allow starting new loop while previous loop is active (different session)", async () => { // given - active loop in session A const hook = createRalphLoopHook(createMockPluginInput()) @@ -928,7 +962,7 @@ Original task: Build something` const elapsed = Date.now() - startTime // then - should complete quickly (not hang for 10s) - expect(elapsed).toBeLessThan(2000) + expect(elapsed).toBeLessThan(6000) // then - loop should continue (API error = no completion detected) expect(promptCalls.length).toBe(1) expect(apiCallCount).toBeGreaterThan(0) diff --git a/src/hooks/ralph-loop/index.ts b/src/hooks/ralph-loop/index.ts index 3cc77edd2..693ef37e2 100644 --- a/src/hooks/ralph-loop/index.ts +++ b/src/hooks/ralph-loop/index.ts @@ -67,7 +67,7 @@ export interface RalphLoopHook { getState: () => RalphLoopState | null } -const DEFAULT_API_TIMEOUT = 3000 +const DEFAULT_API_TIMEOUT = 5000 export function createRalphLoopHook( ctx: PluginInput, @@ -80,6 +80,23 @@ export function createRalphLoopHook( const apiTimeout = options?.apiTimeout ?? DEFAULT_API_TIMEOUT const checkSessionExists = options?.checkSessionExists + async function withTimeout(promise: Promise, timeoutMs: number): Promise { + let timeoutId: ReturnType | undefined + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => { + reject(new Error("API timeout")) + }, timeoutMs) + }) + + try { + return await Promise.race([promise, timeoutPromise]) + } finally { + if (timeoutId !== undefined) { + clearTimeout(timeoutId) + } + } + } + function getSessionState(sessionID: string): SessionState { let state = sessions.get(sessionID) if (!state) { @@ -126,34 +143,44 @@ export function createRalphLoopHook( promise: string ): Promise { try { - const response = await Promise.race([ + const response = await withTimeout( ctx.client.session.messages({ path: { id: sessionID }, query: { directory: ctx.directory }, }), - new Promise((_, reject) => - setTimeout(() => reject(new Error("API timeout")), apiTimeout) - ), - ]) + apiTimeout + ) const messages = (response as { data?: unknown[] }).data ?? [] if (!Array.isArray(messages)) return false - const assistantMessages = (messages as OpenCodeSessionMessage[]).filter( - (msg) => msg.info?.role === "assistant" - ) - const lastAssistant = assistantMessages[assistantMessages.length - 1] - if (!lastAssistant?.parts) return false + const assistantMessages = (messages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant") + if (assistantMessages.length === 0) return false const pattern = new RegExp(`\\s*${escapeRegex(promise)}\\s*`, "is") - const responseText = lastAssistant.parts - .filter((p) => p.type === "text") - .map((p) => p.text ?? "") - .join("\n") - return pattern.test(responseText) + const recentAssistants = assistantMessages.slice(-3) + for (const assistant of recentAssistants) { + if (!assistant.parts) continue + + const responseText = assistant.parts + .filter((p) => p.type === "text" || p.type === "reasoning") + .map((p) => p.text ?? "") + .join("\n") + + if (pattern.test(responseText)) { + return true + } + } + + return false } catch (err) { - log(`[${HOOK_NAME}] Session messages check failed`, { sessionID, error: String(err) }) + setTimeout(() => { + log(`[${HOOK_NAME}] Session messages check failed`, { + sessionID, + error: String(err), + }) + }, 0) return false } } @@ -343,7 +370,10 @@ export function createRalphLoopHook( let model: { providerID: string; modelID: string } | undefined try { - const messagesResp = await ctx.client.session.messages({ path: { id: sessionID } }) + const messagesResp = await withTimeout( + ctx.client.session.messages({ path: { id: sessionID } }), + apiTimeout + ) const messages = (messagesResp.data ?? []) as Array<{ info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string } }> From 06611a7645de15ca99cc02fb8b4b05680492f908 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 14:56:43 +0900 Subject: [PATCH 25/30] fix(mcp): remove duplicate x-api-key header, add test (#1627) --- src/mcp/websearch.test.ts | 119 ++++++-------------------------------- 1 file changed, 17 insertions(+), 102 deletions(-) diff --git a/src/mcp/websearch.test.ts b/src/mcp/websearch.test.ts index 5c7bd5c44..050c4297a 100644 --- a/src/mcp/websearch.test.ts +++ b/src/mcp/websearch.test.ts @@ -1,45 +1,18 @@ -import { describe, expect, test, beforeEach, afterEach } from "bun:test" import { createWebsearchConfig } from "./websearch" -describe("websearch MCP provider configuration", () => { - const originalEnv = { ...process.env } +declare const describe: (name: string, callback: () => void) => void +declare const test: (name: string, callback: () => void) => void +declare const expect: (value: unknown) => { + toContain: (expected: string) => void + toBeUndefined: () => void +} +declare const process: { env: Record } - beforeEach(() => { - delete process.env.EXA_API_KEY - delete process.env.TAVILY_API_KEY - }) - - afterEach(() => { - process.env = { ...originalEnv } - }) - - test("returns Exa config when no config provided", () => { - //#given - no config - - //#when - const result = createWebsearchConfig() - - //#then - expect(result.url).toContain("mcp.exa.ai") - expect(result.type).toBe("remote") - expect(result.enabled).toBe(true) - }) - - test("returns Exa config when provider is 'exa'", () => { - //#given - const config = { provider: "exa" as const } - - //#when - const result = createWebsearchConfig(config) - - //#then - expect(result.url).toContain("mcp.exa.ai") - expect(result.type).toBe("remote") - }) - - test("adds exaApiKey query param when EXA_API_KEY is set", () => { +describe("createWebsearchConfig (Exa)", () => { + test("appends exaApiKey query param when EXA_API_KEY is set", () => { //#given const apiKey = "test-exa-key-12345" + const originalExaApiKey = process.env.EXA_API_KEY process.env.EXA_API_KEY = apiKey //#when @@ -47,11 +20,14 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.url).toContain(`exaApiKey=${encodeURIComponent(apiKey)}`) + + process.env.EXA_API_KEY = originalExaApiKey }) test("does not set x-api-key header when EXA_API_KEY is set", () => { //#given const apiKey = "test-exa-key-12345" + const originalExaApiKey = process.env.EXA_API_KEY process.env.EXA_API_KEY = apiKey //#when @@ -59,71 +35,10 @@ describe("websearch MCP provider configuration", () => { //#then expect(result.headers).toBeUndefined() - }) + if (result.headers) { + expect(result.headers["x-api-key"]).toBeUndefined() + } - test("returns Tavily config when provider is 'tavily' and TAVILY_API_KEY set", () => { - //#given - const tavilyKey = "test-tavily-key-67890" - process.env.TAVILY_API_KEY = tavilyKey - const config = { provider: "tavily" as const } - - //#when - const result = createWebsearchConfig(config) - - //#then - expect(result.url).toContain("mcp.tavily.com") - expect(result.headers).toEqual({ Authorization: `Bearer ${tavilyKey}` }) - }) - - test("throws error when provider is 'tavily' but TAVILY_API_KEY missing", () => { - //#given - delete process.env.TAVILY_API_KEY - const config = { provider: "tavily" as const } - - //#when - const createTavilyConfig = () => createWebsearchConfig(config) - - //#then - expect(createTavilyConfig).toThrow("TAVILY_API_KEY environment variable is required") - }) - - test("returns Exa when both keys present but no explicit provider", () => { - //#given - process.env.EXA_API_KEY = "test-exa-key" - process.env.TAVILY_API_KEY = "test-tavily-key" - - //#when - const result = createWebsearchConfig() - - //#then - expect(result.url).toContain("mcp.exa.ai") - expect(result.url).toContain("exaApiKey=") - expect(result.headers).toBeUndefined() - }) - - test("Tavily config uses Authorization Bearer header format", () => { - //#given - const tavilyKey = "tavily-secret-key-xyz" - process.env.TAVILY_API_KEY = tavilyKey - const config = { provider: "tavily" as const } - - //#when - const result = createWebsearchConfig(config) - - //#then - expect(result.headers?.Authorization).toMatch(/^Bearer /) - expect(result.headers?.Authorization).toBe(`Bearer ${tavilyKey}`) - }) - - test("Exa config has no headers when EXA_API_KEY not set", () => { - //#given - delete process.env.EXA_API_KEY - - //#when - const result = createWebsearchConfig() - - //#then - expect(result.url).toContain("mcp.exa.ai") - expect(result.headers).toBeUndefined() + process.env.EXA_API_KEY = originalExaApiKey }) }) From a3dd1dbaf96b1ab43282d4eb3f8f93b8766b1e64 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 15:28:31 +0900 Subject: [PATCH 26/30] test(mcp): restore Tavily tests and add encoding edge case (#1627) --- src/mcp/websearch.test.ts | 151 +++++++++++++++++++++++++++++++++----- 1 file changed, 133 insertions(+), 18 deletions(-) diff --git a/src/mcp/websearch.test.ts b/src/mcp/websearch.test.ts index 050c4297a..2b09e395d 100644 --- a/src/mcp/websearch.test.ts +++ b/src/mcp/websearch.test.ts @@ -1,18 +1,61 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test" import { createWebsearchConfig } from "./websearch" -declare const describe: (name: string, callback: () => void) => void -declare const test: (name: string, callback: () => void) => void -declare const expect: (value: unknown) => { - toContain: (expected: string) => void - toBeUndefined: () => void -} -declare const process: { env: Record } +describe("websearch MCP provider configuration", () => { + let originalExaApiKey: string | undefined + let originalTavilyApiKey: string | undefined + + beforeEach(() => { + originalExaApiKey = process.env.EXA_API_KEY + originalTavilyApiKey = process.env.TAVILY_API_KEY + + delete process.env.EXA_API_KEY + delete process.env.TAVILY_API_KEY + }) + + afterEach(() => { + if (originalExaApiKey === undefined) { + delete process.env.EXA_API_KEY + } else { + process.env.EXA_API_KEY = originalExaApiKey + } + + if (originalTavilyApiKey === undefined) { + delete process.env.TAVILY_API_KEY + } else { + process.env.TAVILY_API_KEY = originalTavilyApiKey + } + }) + + test("returns Exa config when no config provided", () => { + //#given - no config + + //#when + const result = createWebsearchConfig() + + //#then + expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain("tools=web_search_exa") + expect(result.type).toBe("remote") + expect(result.enabled).toBe(true) + }) + + test("returns Exa config when provider is 'exa'", () => { + //#given + const config = { provider: "exa" as const } + + //#when + const result = createWebsearchConfig(config) + + //#then + expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain("tools=web_search_exa") + expect(result.type).toBe("remote") + }) -describe("createWebsearchConfig (Exa)", () => { test("appends exaApiKey query param when EXA_API_KEY is set", () => { //#given const apiKey = "test-exa-key-12345" - const originalExaApiKey = process.env.EXA_API_KEY process.env.EXA_API_KEY = apiKey //#when @@ -20,25 +63,97 @@ describe("createWebsearchConfig (Exa)", () => { //#then expect(result.url).toContain(`exaApiKey=${encodeURIComponent(apiKey)}`) - - process.env.EXA_API_KEY = originalExaApiKey }) test("does not set x-api-key header when EXA_API_KEY is set", () => { //#given - const apiKey = "test-exa-key-12345" - const originalExaApiKey = process.env.EXA_API_KEY - process.env.EXA_API_KEY = apiKey + process.env.EXA_API_KEY = "test-exa-key-12345" //#when const result = createWebsearchConfig() //#then expect(result.headers).toBeUndefined() - if (result.headers) { - expect(result.headers["x-api-key"]).toBeUndefined() - } + }) - process.env.EXA_API_KEY = originalExaApiKey + test("URL-encodes EXA_API_KEY when it contains special characters", () => { + //#given an EXA_API_KEY with special characters (+ & =) + const apiKey = "a+b&c=d" + process.env.EXA_API_KEY = apiKey + + //#when createWebsearchConfig is called + const result = createWebsearchConfig() + + //#then the URL contains the properly encoded key via encodeURIComponent + expect(result.url).toContain(`exaApiKey=${encodeURIComponent(apiKey)}`) + }) + + test("returns Tavily config when provider is 'tavily' and TAVILY_API_KEY set", () => { + //#given + const tavilyKey = "test-tavily-key-67890" + process.env.TAVILY_API_KEY = tavilyKey + const config = { provider: "tavily" as const } + + //#when + const result = createWebsearchConfig(config) + + //#then + expect(result.url).toContain("mcp.tavily.com") + expect(result.headers).toEqual({ Authorization: `Bearer ${tavilyKey}` }) + }) + + test("throws error when provider is 'tavily' but TAVILY_API_KEY missing", () => { + //#given + delete process.env.TAVILY_API_KEY + const config = { provider: "tavily" as const } + + //#when + const createTavilyConfig = () => createWebsearchConfig(config) + + //#then + expect(createTavilyConfig).toThrow("TAVILY_API_KEY environment variable is required") + }) + + test("returns Exa when both keys present but no explicit provider", () => { + //#given + const exaKey = "test-exa-key" + process.env.EXA_API_KEY = exaKey + process.env.TAVILY_API_KEY = "test-tavily-key" + + //#when + const result = createWebsearchConfig() + + //#then + expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain(`exaApiKey=${encodeURIComponent(exaKey)}`) + expect(result.headers).toBeUndefined() + }) + + test("Tavily config uses Authorization Bearer header format", () => { + //#given + const tavilyKey = "tavily-secret-key-xyz" + process.env.TAVILY_API_KEY = tavilyKey + const config = { provider: "tavily" as const } + + //#when + const result = createWebsearchConfig(config) + + //#then + expect(result.headers?.Authorization).toMatch(/^Bearer /) + expect(result.headers?.Authorization).toBe(`Bearer ${tavilyKey}`) + }) + + test("Exa config has no headers when EXA_API_KEY not set", () => { + //#given + delete process.env.EXA_API_KEY + + //#when + const result = createWebsearchConfig() + + //#then + expect(result.url).toContain("mcp.exa.ai") + expect(result.url).toContain("tools=web_search_exa") + expect(result.url).not.toContain("exaApiKey=") + expect(result.headers).toBeUndefined() }) }) From 321b319b586e5e274c8f22f5438b0c3bd7a70234 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 15:34:47 +0900 Subject: [PATCH 27/30] fix(agents): use config data instead of client API to avoid init deadlock (#1623) --- src/agents/utils.test.ts | 196 ++++++++++++++++++++++++-- src/agents/utils.ts | 43 +++--- src/plugin-handlers/config-handler.ts | 82 +++++++---- 3 files changed, 257 insertions(+), 64 deletions(-) diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts index a101840f8..dfe9d972f 100644 --- a/src/agents/utils.test.ts +++ b/src/agents/utils.test.ts @@ -250,7 +250,7 @@ describe("createBuiltinAgents with model overrides", () => { expect(agents.sisyphus.prompt).toContain("git-master") }) - test("includes custom agents from OpenCode registry in orchestrator prompts", async () => { + test("includes custom agents in orchestrator prompts when provided via config", async () => { // #given const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( new Set([ @@ -263,20 +263,13 @@ describe("createBuiltinAgents with model overrides", () => { ]) ) - const client = { - agent: { - list: async () => ({ - data: [ - { - name: "researcher", - description: "Research agent for deep analysis", - mode: "subagent", - hidden: false, - }, - ], - }), + const customAgentSummaries = [ + { + name: "researcher", + description: "Research agent for deep analysis", + hidden: false, }, - } + ] try { // #when @@ -288,7 +281,7 @@ describe("createBuiltinAgents with model overrides", () => { undefined, undefined, [], - client + customAgentSummaries ) // #then @@ -299,6 +292,179 @@ describe("createBuiltinAgents with model overrides", () => { fetchSpy.mockRestore() } }) + + test("excludes hidden custom agents from orchestrator prompts", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { + name: "hidden-agent", + description: "Should never show", + hidden: true, + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).not.toContain("hidden-agent") + expect(agents.hephaestus.prompt).not.toContain("hidden-agent") + expect(agents.atlas.prompt).not.toContain("hidden-agent") + } finally { + fetchSpy.mockRestore() + } + }) + + test("excludes disabled custom agents from orchestrator prompts", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { + name: "disabled-agent", + description: "Should never show", + disabled: true, + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).not.toContain("disabled-agent") + expect(agents.hephaestus.prompt).not.toContain("disabled-agent") + expect(agents.atlas.prompt).not.toContain("disabled-agent") + } finally { + fetchSpy.mockRestore() + } + }) + + test("excludes custom agents when disabledAgents contains their name (case-insensitive)", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const disabledAgents = ["ReSeArChEr"] + const customAgentSummaries = [ + { + name: "researcher", + description: "Should never show", + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + disabledAgents, + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).not.toContain("researcher") + expect(agents.hephaestus.prompt).not.toContain("researcher") + expect(agents.atlas.prompt).not.toContain("researcher") + } finally { + fetchSpy.mockRestore() + } + }) + + test("deduplicates custom agents case-insensitively", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { name: "Researcher", description: "First" }, + { name: "researcher", description: "Second" }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + const matches = agents.sisyphus.prompt.match(/Custom agent: researcher/gi) ?? [] + expect(matches.length).toBe(1) + } finally { + fetchSpy.mockRestore() + } + }) + + test("sanitizes custom agent strings for markdown tables", async () => { + // #given + const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue( + new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"]) + ) + + const customAgentSummaries = [ + { + name: "table-agent", + description: "Line1\nAlpha | Beta", + }, + ] + + try { + // #when + const agents = await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + customAgentSummaries + ) + + // #then + expect(agents.sisyphus.prompt).toContain("Line1 Alpha \\| Beta") + } finally { + fetchSpy.mockRestore() + } + }) }) describe("createBuiltinAgents without systemDefaultModel", () => { diff --git a/src/agents/utils.ts b/src/agents/utils.ts index bdd954884..55d6187b4 100644 --- a/src/agents/utils.ts +++ b/src/agents/utils.ts @@ -68,6 +68,14 @@ type RegisteredAgentSummary = { description: string } +function sanitizeMarkdownTableCell(value: string): string { + return value + .replace(/\r?\n/g, " ") + .replace(/\|/g, "\\|") + .replace(/\s+/g, " ") + .trim() +} + function isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null } @@ -85,37 +93,28 @@ function parseRegisteredAgentSummaries(input: unknown): RegisteredAgentSummary[] const hidden = item.hidden if (hidden === true) continue + const disabled = item.disabled + if (disabled === true) continue + + const enabled = item.enabled + if (enabled === false) continue + const description = typeof item.description === "string" ? item.description : "" - result.push({ name, description }) + result.push({ name, description: sanitizeMarkdownTableCell(description) }) } return result } -async function fetchRegisteredAgentsFromClient(client: unknown): Promise { - if (!isRecord(client)) return [] - const agentObj = client.agent - if (!isRecord(agentObj)) return [] - const listFn = agentObj.list - if (typeof listFn !== "function") return [] - - try { - const response = await listFn.call(agentObj) - if (!isRecord(response)) return [] - return parseRegisteredAgentSummaries(response.data) - } catch { - return [] - } -} - function buildCustomAgentMetadata(agentName: string, description: string): AgentPromptMetadata { - const shortDescription = truncateDescription(description).trim() + const shortDescription = sanitizeMarkdownTableCell(truncateDescription(description)) + const safeAgentName = sanitizeMarkdownTableCell(agentName) return { category: "specialist", cost: "CHEAP", triggers: [ { - domain: `Custom agent: ${agentName}`, + domain: `Custom agent: ${safeAgentName}`, trigger: shortDescription || "Use when this agent's description matches the task", }, ], @@ -303,13 +302,13 @@ export async function createBuiltinAgents( categories?: CategoriesConfig, gitMasterConfig?: GitMasterConfig, discoveredSkills: LoadedSkill[] = [], - client?: any, + customAgentSummaries?: unknown, browserProvider?: BrowserAutomationProvider, uiSelectedModel?: string, disabledSkills?: Set ): Promise> { const connectedProviders = readConnectedProvidersCache() - // IMPORTANT: Do NOT pass client to fetchAvailableModels during plugin initialization. + // IMPORTANT: Do NOT call OpenCode client APIs during plugin initialization. // This function is called from config handler, and calling client API causes deadlock. // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301 const availableModels = await fetchAvailableModels(undefined, { @@ -349,7 +348,7 @@ export async function createBuiltinAgents( const availableSkills: AvailableSkill[] = [...builtinAvailable, ...discoveredAvailable] - const registeredAgents = await fetchRegisteredAgentsFromClient(client) + const registeredAgents = parseRegisteredAgentSummaries(customAgentSummaries) const builtinAgentNames = new Set(Object.keys(agentSources).map((n) => n.toLowerCase())) const disabledAgentNames = new Set(disabledAgents.map((n) => n.toLowerCase())) diff --git a/src/plugin-handlers/config-handler.ts b/src/plugin-handlers/config-handler.ts index 41adbaf20..1c5c7cd3e 100644 --- a/src/plugin-handlers/config-handler.ts +++ b/src/plugin-handlers/config-handler.ts @@ -183,19 +183,40 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { // Pass it as uiSelectedModel so it takes highest priority in model resolution const currentModel = config.model as string | undefined; const disabledSkills = new Set(pluginConfig.disabled_skills ?? []); - const builtinAgents = await createBuiltinAgents( - migratedDisabledAgents, - pluginConfig.agents, - ctx.directory, - undefined, // systemDefaultModel - let fallback chain handle this - pluginConfig.categories, - pluginConfig.git_master, - allDiscoveredSkills, - ctx.client, - browserProvider, - currentModel, // uiSelectedModel - takes highest priority - disabledSkills - ); + + type AgentConfig = Record< + string, + Record | undefined + > & { + build?: Record; + plan?: Record; + explore?: { tools?: Record }; + librarian?: { tools?: Record }; + "multimodal-looker"?: { tools?: Record }; + atlas?: { tools?: Record }; + sisyphus?: { tools?: Record }; + }; + const configAgent = config.agent as AgentConfig | undefined; + + function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; + } + + function buildCustomAgentSummaryInput(agents: Record | undefined): unknown[] { + if (!agents) return []; + + const result: unknown[] = []; + for (const [name, value] of Object.entries(agents)) { + if (!isRecord(value)) continue; + + const description = typeof value.description === "string" ? value.description : ""; + const hidden = value.hidden === true; + const disabled = value.disabled === true || value.enabled === false; + result.push({ name, description, hidden, disabled }); + } + + return result; + } // Claude Code agents: Do NOT apply permission migration // Claude Code uses whitelist-based tools format which is semantically different @@ -216,6 +237,27 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { ]) ); + const customAgentSummaries = [ + ...buildCustomAgentSummaryInput(configAgent), + ...buildCustomAgentSummaryInput(userAgents), + ...buildCustomAgentSummaryInput(projectAgents), + ...buildCustomAgentSummaryInput(pluginAgents), + ]; + + const builtinAgents = await createBuiltinAgents( + migratedDisabledAgents, + pluginConfig.agents, + ctx.directory, + undefined, // systemDefaultModel - let fallback chain handle this + pluginConfig.categories, + pluginConfig.git_master, + allDiscoveredSkills, + customAgentSummaries, + browserProvider, + currentModel, // uiSelectedModel - takes highest priority + disabledSkills + ); + const isSisyphusEnabled = pluginConfig.sisyphus_agent?.disabled !== true; const builderEnabled = pluginConfig.sisyphus_agent?.default_builder_enabled ?? false; @@ -224,20 +266,6 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { const replacePlan = pluginConfig.sisyphus_agent?.replace_plan ?? true; const shouldDemotePlan = plannerEnabled && replacePlan; - type AgentConfig = Record< - string, - Record | undefined - > & { - build?: Record; - plan?: Record; - explore?: { tools?: Record }; - librarian?: { tools?: Record }; - "multimodal-looker"?: { tools?: Record }; - atlas?: { tools?: Record }; - sisyphus?: { tools?: Record }; - }; - const configAgent = config.agent as AgentConfig | undefined; - if (isSisyphusEnabled && builtinAgents.sisyphus) { (config as { default_agent?: string }).default_agent = "sisyphus"; From 582e0ead27564214bbe8d7958777f1e8afc7769c Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 16:31:02 +0900 Subject: [PATCH 28/30] fix: revert load_skills default and enforce via prompts instead Revert .default([]) on load_skills schema back to required, restore the runtime error for missing load_skills, and add explicit load_skills=[] to all task() examples in agent prompts that were missing it. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --- src/agents/atlas/default.ts | 6 ++-- src/agents/atlas/gpt.ts | 4 +-- src/agents/prometheus/high-accuracy-mode.ts | 1 + src/agents/prometheus/interview-mode.ts | 30 ++++++++--------- src/agents/prometheus/plan-generation.ts | 1 + .../keyword-detector/ultrawork/default.ts | 20 +++++------ .../keyword-detector/ultrawork/gpt5.2.ts | 8 ++--- .../keyword-detector/ultrawork/planner.ts | 6 ++-- src/tools/delegate-task/tools.test.ts | 33 ++++++------------- src/tools/delegate-task/tools.ts | 4 +-- 10 files changed, 51 insertions(+), 62 deletions(-) diff --git a/src/agents/atlas/default.ts b/src/agents/atlas/default.ts index dfe5cf5f0..2568d5111 100644 --- a/src/agents/atlas/default.ts +++ b/src/agents/atlas/default.ts @@ -274,13 +274,13 @@ ACCUMULATED WISDOM: **For exploration (explore/librarian)**: ALWAYS background \`\`\`typescript -task(subagent_type="explore", run_in_background=true, ...) -task(subagent_type="librarian", run_in_background=true, ...) +task(subagent_type="explore", load_skills=[], run_in_background=true, ...) +task(subagent_type="librarian", load_skills=[], run_in_background=true, ...) \`\`\` **For task execution**: NEVER background \`\`\`typescript -task(category="...", run_in_background=false, ...) +task(category="...", load_skills=[...], run_in_background=false, ...) \`\`\` **Parallel task groups**: Invoke multiple in ONE message diff --git a/src/agents/atlas/gpt.ts b/src/agents/atlas/gpt.ts index d7d20fd70..d81620e69 100644 --- a/src/agents/atlas/gpt.ts +++ b/src/agents/atlas/gpt.ts @@ -231,12 +231,12 @@ ACCUMULATED WISDOM: [from notepad] **Exploration (explore/librarian)**: ALWAYS background \`\`\`typescript -task(subagent_type="explore", run_in_background=true, ...) +task(subagent_type="explore", load_skills=[], run_in_background=true, ...) \`\`\` **Task execution**: NEVER background \`\`\`typescript -task(category="...", run_in_background=false, ...) +task(category="...", load_skills=[...], run_in_background=false, ...) \`\`\` **Parallel task groups**: Invoke multiple in ONE message diff --git a/src/agents/prometheus/high-accuracy-mode.ts b/src/agents/prometheus/high-accuracy-mode.ts index d6ecc821f..5eca99a86 100644 --- a/src/agents/prometheus/high-accuracy-mode.ts +++ b/src/agents/prometheus/high-accuracy-mode.ts @@ -17,6 +17,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION while (true) { const result = task( subagent_type="momus", + load_skills=[], prompt=".sisyphus/plans/{name}.md", run_in_background=false ) diff --git a/src/agents/prometheus/interview-mode.ts b/src/agents/prometheus/interview-mode.ts index 8692fd2f2..5d445f0cb 100644 --- a/src/agents/prometheus/interview-mode.ts +++ b/src/agents/prometheus/interview-mode.ts @@ -66,8 +66,8 @@ Or should I just note down this single fix?" **Research First:** \`\`\`typescript // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find) -task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) -task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) \`\`\` **Interview Focus:** @@ -91,9 +91,9 @@ task(subagent_type="explore", prompt="I'm about to modify [affected code] and ne \`\`\`typescript // Launch BEFORE asking user questions // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST -task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) -task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) \`\`\` **Interview Focus** (AFTER research): @@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js Run this check: \`\`\`typescript -task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) \`\`\` #### Step 2: Ask the Test Question (MANDATORY) @@ -230,13 +230,13 @@ Add to draft immediately: **Research First:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) \`\`\` **Oracle Consultation** (recommend when stakes are high): \`\`\`typescript -task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false) +task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false) \`\`\` **Interview Focus:** @@ -253,9 +253,9 @@ task(subagent_type="oracle", prompt="Architecture consultation needed: [context] **Parallel Investigation:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) \`\`\` **Interview Focus:** @@ -281,17 +281,17 @@ task(subagent_type="librarian", prompt="I'm looking for battle-tested implementa **For Understanding Codebase:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) \`\`\` **For External Knowledge:** \`\`\`typescript -task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) \`\`\` **For Implementation Examples:** \`\`\`typescript -task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) \`\`\` ## Interview Mode Anti-Patterns diff --git a/src/agents/prometheus/plan-generation.ts b/src/agents/prometheus/plan-generation.ts index 3443d6888..f5c1270e8 100644 --- a/src/agents/prometheus/plan-generation.ts +++ b/src/agents/prometheus/plan-generation.ts @@ -61,6 +61,7 @@ todoWrite([ \`\`\`typescript task( subagent_type="metis", + load_skills=[], prompt=\`Review this planning session before I generate the work plan: **User's Goal**: {summarize what user wants} diff --git a/src/hooks/keyword-detector/ultrawork/default.ts b/src/hooks/keyword-detector/ultrawork/default.ts index fb7fce31d..93ddc648a 100644 --- a/src/hooks/keyword-detector/ultrawork/default.ts +++ b/src/hooks/keyword-detector/ultrawork/default.ts @@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST. | Architecture decision needed | MUST call plan agent | \`\`\` -task(subagent_type="plan", prompt="") +task(subagent_type="plan", load_skills=[], prompt="") \`\`\` **WHY PLAN AGENT IS MANDATORY:** @@ -119,9 +119,9 @@ task(subagent_type="plan", prompt="") | Scenario | Action | |----------|--------| -| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="")\` | -| Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: ")\` | -| Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` | +| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", load_skills=[], prompt="")\` | +| Need to refine the plan | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Please adjust: ")\` | +| Plan needs more detail | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Add more detail to Task N")\` | **WHY SESSION_ID IS CRITICAL:** - Plan agent retains FULL conversation context @@ -131,10 +131,10 @@ task(subagent_type="plan", prompt="") \`\`\` // WRONG: Starting fresh loses all context -task(subagent_type="plan", prompt="Here's more info...") +task(subagent_type="plan", load_skills=[], prompt="Here's more info...") // CORRECT: Resume preserves everything -task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") +task(session_id="ses_abc123", load_skills=[], prompt="Here's my answer to your question: ...") \`\`\` **FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.** @@ -147,10 +147,10 @@ task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") | Task Type | Action | Why | |-----------|--------|-----| -| Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient | -| Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge | -| Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list | -| Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic | +| Codebase exploration | task(subagent_type="explore", load_skills=[], run_in_background=true) | Parallel, context-efficient | +| Documentation lookup | task(subagent_type="librarian", load_skills=[], run_in_background=true) | Specialized knowledge | +| Planning | task(subagent_type="plan", load_skills=[]) | Parallel task graph + structured TODO list | +| Hard problem (conventional) | task(subagent_type="oracle", load_skills=[]) | Architecture, debugging, complex logic | | Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed | | Implementation | task(category="...", load_skills=[...]) | Domain-optimized models | diff --git a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts index 9309f4294..a9258e0de 100644 --- a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts +++ b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts @@ -73,10 +73,10 @@ Use these when they provide clear value based on the decision framework above: | Resource | When to Use | How to Use | |----------|-------------|------------| -| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` | -| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` | -| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` | -| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` | +| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", load_skills=[], run_in_background=true, ...)\` | +| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)\` | +| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", load_skills=[], ...)\` | +| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", load_skills=[], ...)\` | | task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` | diff --git a/src/hooks/keyword-detector/ultrawork/planner.ts b/src/hooks/keyword-detector/ultrawork/planner.ts index 426926f48..e152221f4 100644 --- a/src/hooks/keyword-detector/ultrawork/planner.ts +++ b/src/hooks/keyword-detector/ultrawork/planner.ts @@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans. ### Research Protocol 1. **Fire parallel background agents** for comprehensive context: \`\`\` - task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true) - task(agent="explore", prompt="Find test infrastructure and conventions", background=true) - task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true) + task(subagent_type="explore", load_skills=[], prompt="Find existing patterns for [topic] in codebase", run_in_background=true) + task(subagent_type="explore", load_skills=[], prompt="Find test infrastructure and conventions", run_in_background=true) + task(subagent_type="librarian", load_skills=[], prompt="Find official docs and best practices for [technology]", run_in_background=true) \`\`\` 2. **Wait for results** before planning - rushed plans fail 3. **Synthesize findings** into informed requirements diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 4e45a7981..9fb4224bb 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -849,30 +849,19 @@ describe("sisyphus-task", () => { }) describe("skills parameter", () => { - test("load_skills defaults to empty array when not provided (undefined)", async () => { + test("skills parameter is required - throws error when not provided", async () => { // given const { createDelegateTask } = require("./tools") - let promptBody: any const mockManager = { launch: async () => ({}) } - - const promptMock = async (input: any) => { - promptBody = input.body - return { data: {} } - } - const mockClient = { app: { agents: async () => ({ data: [] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_default_skills" } }), - prompt: promptMock, - promptAsync: promptMock, - messages: async () => ({ - data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }] - }), - status: async () => ({ data: {} }), + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + promptAsync: async () => ({ data: {} }), + messages: async () => ({ data: [] }), }, } @@ -888,8 +877,9 @@ describe("sisyphus-task", () => { abort: new AbortController().signal, } - // when - load_skills not provided (undefined) - should default to [] - await tool.execute( + // when - skills not provided (undefined) + // then - should throw error about missing skills + await expect(tool.execute( { description: "Test task", prompt: "Do something", @@ -897,11 +887,8 @@ describe("sisyphus-task", () => { run_in_background: false, }, toolContext - ) - - // then - should proceed without error, prompt should be called - expect(promptBody).toBeDefined() - }, { timeout: 20000 }) + )).rejects.toThrow("IT IS HIGHLY RECOMMENDED") + }) test("null skills throws error", async () => { // given diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts index 582eb11fd..1db72408c 100644 --- a/src/tools/delegate-task/tools.ts +++ b/src/tools/delegate-task/tools.ts @@ -74,7 +74,7 @@ Prompts MUST be in English.` return tool({ description, args: { - load_skills: tool.schema.array(tool.schema.string()).default([]).describe("Skill names to inject. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."), + load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."), description: tool.schema.string().describe("Short task description (3-5 words)"), prompt: tool.schema.string().describe("Full detailed prompt for the agent"), run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"), @@ -97,7 +97,7 @@ Prompts MUST be in English.` throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`) } if (args.load_skills === undefined) { - args.load_skills = [] + throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`) } if (args.load_skills === null) { throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`) From a5489718f9659b447f3724dd4f494c830970efbe Mon Sep 17 00:00:00 2001 From: justsisyphus Date: Tue, 27 Jan 2026 17:40:41 +0900 Subject: [PATCH 29/30] feat(commands): add /handoff builtin command with programmatic context synthesis Port handoff concept from ampcode as a builtin command that extracts detailed context summary from current session for seamless continuation in a new session. Enhanced with programmatic context gathering: - Add HANDOFF_TEMPLATE with phased extraction (gather programmatic context via session_read/todoread/git, extract context, format, instruct) - Gather concrete data: session history, todo state, git diff/status - Include compaction-style sections: USER REQUESTS (AS-IS) verbatim, EXPLICIT CONSTRAINTS verbatim, plus all original handoff sections - Register handoff in BuiltinCommandName type and command definitions - Include session context variables (SESSION_ID, TIMESTAMP, ARGUMENTS) - Add 14 tests covering registration, template content, programmatic gathering, compaction-style sections, and emoji-free constraint --- .../builtin-commands/commands.test.ts | 138 ++++++++++++++ src/features/builtin-commands/commands.ts | 17 ++ .../builtin-commands/templates/handoff.ts | 177 ++++++++++++++++++ src/features/builtin-commands/types.ts | 2 +- 4 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 src/features/builtin-commands/commands.test.ts create mode 100644 src/features/builtin-commands/templates/handoff.ts diff --git a/src/features/builtin-commands/commands.test.ts b/src/features/builtin-commands/commands.test.ts new file mode 100644 index 000000000..c6927bc70 --- /dev/null +++ b/src/features/builtin-commands/commands.test.ts @@ -0,0 +1,138 @@ +import { describe, test, expect } from "bun:test" +import { loadBuiltinCommands } from "./commands" +import { HANDOFF_TEMPLATE } from "./templates/handoff" +import type { BuiltinCommandName } from "./types" + +describe("loadBuiltinCommands", () => { + test("should include handoff command in loaded commands", () => { + //#given + const disabledCommands: BuiltinCommandName[] = [] + + //#when + const commands = loadBuiltinCommands(disabledCommands) + + //#then + expect(commands.handoff).toBeDefined() + expect(commands.handoff.name).toBe("handoff") + }) + + test("should exclude handoff when disabled", () => { + //#given + const disabledCommands: BuiltinCommandName[] = ["handoff"] + + //#when + const commands = loadBuiltinCommands(disabledCommands) + + //#then + expect(commands.handoff).toBeUndefined() + }) + + test("should include handoff template content in command template", () => { + //#given - no disabled commands + + //#when + const commands = loadBuiltinCommands() + + //#then + expect(commands.handoff.template).toContain(HANDOFF_TEMPLATE) + }) + + test("should include session context variables in handoff template", () => { + //#given - no disabled commands + + //#when + const commands = loadBuiltinCommands() + + //#then + expect(commands.handoff.template).toContain("$SESSION_ID") + expect(commands.handoff.template).toContain("$TIMESTAMP") + expect(commands.handoff.template).toContain("$ARGUMENTS") + }) + + test("should have correct description for handoff", () => { + //#given - no disabled commands + + //#when + const commands = loadBuiltinCommands() + + //#then + expect(commands.handoff.description).toContain("context summary") + }) +}) + +describe("HANDOFF_TEMPLATE", () => { + test("should include session reading instruction", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("session_read") + }) + + test("should include compaction-style sections in output format", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("USER REQUESTS (AS-IS)") + expect(HANDOFF_TEMPLATE).toContain("EXPLICIT CONSTRAINTS") + }) + + test("should include programmatic context gathering instructions", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("todoread") + expect(HANDOFF_TEMPLATE).toContain("git diff") + expect(HANDOFF_TEMPLATE).toContain("git status") + }) + + test("should include context extraction format", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("WORK COMPLETED") + expect(HANDOFF_TEMPLATE).toContain("CURRENT STATE") + expect(HANDOFF_TEMPLATE).toContain("PENDING TASKS") + expect(HANDOFF_TEMPLATE).toContain("KEY FILES") + expect(HANDOFF_TEMPLATE).toContain("IMPORTANT DECISIONS") + expect(HANDOFF_TEMPLATE).toContain("CONTEXT FOR CONTINUATION") + expect(HANDOFF_TEMPLATE).toContain("GOAL") + }) + + test("should enforce first person perspective", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("first person perspective") + }) + + test("should limit key files to 10", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("Maximum 10 files") + }) + + test("should instruct plain text format without markdown", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("Plain text with bullets") + expect(HANDOFF_TEMPLATE).toContain("No markdown headers") + }) + + test("should include user instructions for new session", () => { + //#given - the template string + + //#when / #then + expect(HANDOFF_TEMPLATE).toContain("new session") + expect(HANDOFF_TEMPLATE).toContain("opencode") + }) + + test("should not contain emojis", () => { + //#given - the template string + + //#when / #then + const emojiRegex = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2702}-\u{27B0}\u{24C2}-\u{1F251}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/u + expect(emojiRegex.test(HANDOFF_TEMPLATE)).toBe(false) + }) +}) diff --git a/src/features/builtin-commands/commands.ts b/src/features/builtin-commands/commands.ts index 998ce2531..aee5dc28a 100644 --- a/src/features/builtin-commands/commands.ts +++ b/src/features/builtin-commands/commands.ts @@ -5,6 +5,7 @@ import { RALPH_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-lo import { STOP_CONTINUATION_TEMPLATE } from "./templates/stop-continuation" import { REFACTOR_TEMPLATE } from "./templates/refactor" import { START_WORK_TEMPLATE } from "./templates/start-work" +import { HANDOFF_TEMPLATE } from "./templates/handoff" const BUILTIN_COMMAND_DEFINITIONS: Record> = { "init-deep": { @@ -77,6 +78,22 @@ $ARGUMENTS ${STOP_CONTINUATION_TEMPLATE} `, }, + handoff: { + description: "(builtin) Create a detailed context summary for continuing work in a new session", + template: ` +${HANDOFF_TEMPLATE} + + + +Session ID: $SESSION_ID +Timestamp: $TIMESTAMP + + + +$ARGUMENTS +`, + argumentHint: "[goal]", + }, } export function loadBuiltinCommands( diff --git a/src/features/builtin-commands/templates/handoff.ts b/src/features/builtin-commands/templates/handoff.ts new file mode 100644 index 000000000..d8010994d --- /dev/null +++ b/src/features/builtin-commands/templates/handoff.ts @@ -0,0 +1,177 @@ +export const HANDOFF_TEMPLATE = `# Handoff Command + +## Purpose + +Use /handoff when: +- The current session context is getting too long and quality is degrading +- You want to start fresh while preserving essential context from this session +- The context window is approaching capacity + +This creates a detailed context summary that can be used to continue work in a new session. + +--- + +# PHASE 0: VALIDATE REQUEST + +Before proceeding, confirm: +- [ ] There is meaningful work or context in this session to preserve +- [ ] The user wants to create a handoff summary (not just asking about it) + +If the session is nearly empty or has no meaningful context, inform the user there is nothing substantial to hand off. + +--- + +# PHASE 1: GATHER PROGRAMMATIC CONTEXT + +Execute these tools to gather concrete data: + +1. session_read({ session_id: "$SESSION_ID" }) — full session history +2. todoread() — current task progress +3. Bash({ command: "git diff --stat HEAD~10..HEAD" }) — recent file changes +4. Bash({ command: "git status --porcelain" }) — uncommitted changes + +Suggested execution order: + +\`\`\` +session_read({ session_id: "$SESSION_ID" }) +todoread() +Bash({ command: "git diff --stat HEAD~10..HEAD" }) +Bash({ command: "git status --porcelain" }) +\`\`\` + +Analyze the gathered outputs to understand: +- What the user asked for (exact wording) +- What work was completed +- What tasks remain incomplete (include todo state) +- What decisions were made +- What files were modified or discussed (include git diff/stat + status) +- What patterns, constraints, or preferences were established + +--- + +# PHASE 2: EXTRACT CONTEXT + +Write the context summary from first person perspective ("I did...", "I told you..."). + +Focus on: +- Capabilities and behavior, not file-by-file implementation details +- What matters for continuing the work +- Avoiding excessive implementation details (variable names, storage keys, constants) unless critical +- USER REQUESTS (AS-IS) must be verbatim (do not paraphrase) +- EXPLICIT CONSTRAINTS must be verbatim only (do not invent) + +Questions to consider when extracting: +- What did I just do or implement? +- What instructions did I already give which are still relevant (e.g. follow patterns in the codebase)? +- What files did I tell you are important or that I am working on? +- Did I provide a plan or spec that should be included? +- What did I already tell you that is important (libraries, patterns, constraints, preferences)? +- What important technical details did I discover (APIs, methods, patterns)? +- What caveats, limitations, or open questions did I find? + +--- + +# PHASE 3: FORMAT OUTPUT + +Generate a handoff summary using this exact format: + +\`\`\` +HANDOFF CONTEXT +=============== + +USER REQUESTS (AS-IS) +--------------------- +- [Exact verbatim user requests - NOT paraphrased] + +GOAL +---- +[One sentence describing what should be done next] + +WORK COMPLETED +-------------- +- [First person bullet points of what was done] +- [Include specific file paths when relevant] +- [Note key implementation decisions] + +CURRENT STATE +------------- +- [Current state of the codebase or task] +- [Build/test status if applicable] +- [Any environment or configuration state] + +PENDING TASKS +------------- +- [Tasks that were planned but not completed] +- [Next logical steps to take] +- [Any blockers or issues encountered] +- [Include current todo state from todoread()] + +KEY FILES +--------- +- [path/to/file1] - [brief role description] +- [path/to/file2] - [brief role description] +(Maximum 10 files, prioritized by importance) +- (Include files from git diff/stat and git status) + +IMPORTANT DECISIONS +------------------- +- [Technical decisions that were made and why] +- [Trade-offs that were considered] +- [Patterns or conventions established] + +EXPLICIT CONSTRAINTS +-------------------- +- [Verbatim constraints only - from user or existing AGENTS.md] +- If none, write: None + +CONTEXT FOR CONTINUATION +------------------------ +- [What the next session needs to know to continue] +- [Warnings or gotchas to be aware of] +- [References to documentation if relevant] +\`\`\` + +Rules for the summary: +- Plain text with bullets +- No markdown headers with # (use the format above with dashes) +- No bold, italic, or code fences within content +- Use workspace-relative paths for files +- Keep it focused - only include what matters for continuation +- Pick an appropriate length based on complexity +- USER REQUESTS (AS-IS) and EXPLICIT CONSTRAINTS must be verbatim only + +--- + +# PHASE 4: PROVIDE INSTRUCTIONS + +After generating the summary, instruct the user: + +\`\`\` +--- + +TO CONTINUE IN A NEW SESSION: + +1. Press 'n' in OpenCode TUI to open a new session, or run 'opencode' in a new terminal +2. Paste the HANDOFF CONTEXT above as your first message +3. Add your request: "Continue from the handoff context above. [Your next task]" + +The new session will have all context needed to continue seamlessly. +\`\`\` + +--- + +# IMPORTANT CONSTRAINTS + +- DO NOT attempt to programmatically create new sessions (no API available to agents) +- DO provide a self-contained summary that works without access to this session +- DO include workspace-relative file paths +- DO NOT include sensitive information (API keys, credentials, secrets) +- DO NOT exceed 10 files in the KEY FILES section +- DO keep the GOAL section to a single sentence or short paragraph + +--- + +# EXECUTE NOW + +Begin by gathering programmatic context, then synthesize the handoff summary. +` diff --git a/src/features/builtin-commands/types.ts b/src/features/builtin-commands/types.ts index 1b1487744..0c2624f12 100644 --- a/src/features/builtin-commands/types.ts +++ b/src/features/builtin-commands/types.ts @@ -1,6 +1,6 @@ import type { CommandDefinition } from "../claude-code-command-loader" -export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" +export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" | "handoff" export interface BuiltinCommandConfig { disabled_commands?: BuiltinCommandName[] From bb86523240958cb0706d80933668f3ea973aafdc Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Sun, 8 Feb 2026 16:48:52 +0900 Subject: [PATCH 30/30] =?UTF-8?q?fix:=20add=20isPlanFamily=20for=20prometh?= =?UTF-8?q?eus=E2=86=94plan=20mutual=20blocking=20and=20task=20permission?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - PLAN_AGENT_NAMES = ['plan'] (system prompt only) - PLAN_FAMILY_NAMES = ['plan', 'prometheus'] (blocking + task permission) - prometheus↔plan mutual delegation blocked via isPlanFamily() - prometheus gets task tool permission via isPlanFamily() - prompt-builder unchanged: prometheus does NOT get plan system prompt --- src/tools/delegate-task/constants.ts | 23 +- src/tools/delegate-task/executor.ts | 8 +- src/tools/delegate-task/tools.test.ts | 299 +++++++++++--------------- 3 files changed, 148 insertions(+), 182 deletions(-) diff --git a/src/tools/delegate-task/constants.ts b/src/tools/delegate-task/constants.ts index 99744f8ce..75816736e 100644 --- a/src/tools/delegate-task/constants.ts +++ b/src/tools/delegate-task/constants.ts @@ -535,18 +535,31 @@ export function buildPlanAgentSystemPrepend( } /** - * List of agent names that should be treated as plan agents. + * List of agent names that should be treated as plan agents (receive plan system prompt). * Case-insensitive matching is used. */ -export const PLAN_AGENT_NAMES = ["plan", "planner"] +export const PLAN_AGENT_NAMES = ["plan"] /** - * Check if the given agent name is a plan agent. - * @param agentName - The agent name to check - * @returns true if the agent is a plan agent + * Check if the given agent name is a plan agent (receives plan system prompt). */ export function isPlanAgent(agentName: string | undefined): boolean { if (!agentName) return false const lowerName = agentName.toLowerCase().trim() return PLAN_AGENT_NAMES.some(name => lowerName === name || lowerName.includes(name)) } + +/** + * Plan family: plan + prometheus. Shares mutual delegation blocking and task tool permission. + * Does NOT share system prompt (only isPlanAgent controls that). + */ +export const PLAN_FAMILY_NAMES = ["plan", "prometheus"] + +/** + * Check if the given agent belongs to the plan family (blocking + task permission). + */ +export function isPlanFamily(agentName: string | undefined): boolean { + if (!agentName) return false + const lowerName = agentName.toLowerCase().trim() + return PLAN_FAMILY_NAMES.some(name => lowerName === name || lowerName.includes(name)) +} diff --git a/src/tools/delegate-task/executor.ts b/src/tools/delegate-task/executor.ts index c07233f6e..721f15a7c 100644 --- a/src/tools/delegate-task/executor.ts +++ b/src/tools/delegate-task/executor.ts @@ -2,7 +2,7 @@ import type { BackgroundManager } from "../../features/background-agent" import type { CategoriesConfig, GitMasterConfig, BrowserAutomationProvider, AgentOverrides } from "../../config/schema" import type { ModelFallbackInfo } from "../../features/task-toast-manager/types" import type { DelegateTaskArgs, ToolContextWithMetadata, OpencodeClient } from "./types" -import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS, isPlanAgent } from "./constants" +import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS, isPlanFamily } from "./constants" import { getTimingConfig } from "./timing" import { parseModelString, getMessageDir, formatDuration, formatDetailedError } from "./helpers" import { resolveCategoryConfig } from "./categories" @@ -601,7 +601,7 @@ export async function executeSyncTask( } try { - const allowTask = isPlanAgent(agentToUse) + const allowTask = isPlanFamily(agentToUse) await promptSyncWithModelSuggestionRetry(client, { path: { id: sessionID }, body: { @@ -876,11 +876,11 @@ Sisyphus-Junior is spawned automatically when you specify a category. Pick the a } } - if (isPlanAgent(agentName) && isPlanAgent(parentAgent)) { + if (isPlanFamily(agentName) && isPlanFamily(parentAgent)) { return { agentToUse: "", categoryModel: undefined, - error: `You are the plan agent. You cannot delegate to plan via task. + error: `You are a plan-family agent (plan/prometheus). You cannot delegate to other plan-family agents via task. Create the work plan directly - that's your job as the planning agent.`, } diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 9fb4224bb..4f32addf8 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -1,6 +1,6 @@ declare const require: (name: string) => any const { describe, test, expect, beforeEach, afterEach, spyOn, mock } = require("bun:test") -import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants" +import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES, isPlanFamily, PLAN_FAMILY_NAMES } from "./constants" import { resolveCategoryConfig } from "./tools" import type { CategoryConfig } from "../../config/schema" import { __resetModelCache } from "../../shared/model-availability" @@ -143,11 +143,11 @@ describe("sisyphus-task", () => { expect(result).toBe(false) }) - test("returns true for 'planner'", () => { - // given / #when + test("returns true for 'planner' (matches via includes('plan'))", () => { + //#given / #when const result = isPlanAgent("planner") - // then + //#then - "planner" contains "plan" so it matches via includes expect(result).toBe(true) }) @@ -199,11 +199,44 @@ describe("sisyphus-task", () => { expect(result).toBe(false) }) - test("PLAN_AGENT_NAMES contains only plan and planner (not prometheus)", () => { + test("PLAN_AGENT_NAMES contains only plan", () => { //#given / #when / #then - expect(PLAN_AGENT_NAMES).toContain("plan") - expect(PLAN_AGENT_NAMES).toContain("planner") - expect(PLAN_AGENT_NAMES).not.toContain("prometheus") + expect(PLAN_AGENT_NAMES).toEqual(["plan"]) + }) + }) + + describe("isPlanFamily", () => { + test("returns true for 'plan'", () => { + //#given / #when + const result = isPlanFamily("plan") + //#then + expect(result).toBe(true) + }) + + test("returns true for 'prometheus'", () => { + //#given / #when + const result = isPlanFamily("prometheus") + //#then + expect(result).toBe(true) + }) + + test("returns false for 'oracle'", () => { + //#given / #when + const result = isPlanFamily("oracle") + //#then + expect(result).toBe(false) + }) + + test("returns false for undefined", () => { + //#given / #when + const result = isPlanFamily(undefined) + //#then + expect(result).toBe(false) + }) + + test("PLAN_FAMILY_NAMES contains plan and prometheus", () => { + //#given / #when / #then + expect(PLAN_FAMILY_NAMES).toEqual(["plan", "prometheus"]) }) }) @@ -2723,149 +2756,95 @@ describe("sisyphus-task", () => { }) }) - describe("plan agent self-delegation block", () => { - test("plan agent cannot delegate to plan - returns error with guidance", async () => { - //#given - current agent is plan + describe("plan family mutual delegation block", () => { + test("plan cannot delegate to plan (self-delegation)", async () => { + //#given const { createDelegateTask } = require("./tools") + const mockClient = { + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "s" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [] }), status: async () => ({ data: {} }) }, + } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) - const mockManager = { launch: async () => ({}) } + //#when + const result = await tool.execute( + { description: "test", prompt: "Create a plan", subagent_type: "plan", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "plan", abort: new AbortController().signal } + ) + + //#then + expect(result).toContain("plan-family") + expect(result).toContain("directly") + }) + + test("prometheus cannot delegate to plan (cross-blocking)", async () => { + //#given + const { createDelegateTask } = require("./tools") + const mockClient = { + app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "s" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [] }), status: async () => ({ data: {} }) }, + } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) + + //#when + const result = await tool.execute( + { description: "test", prompt: "Create a plan", subagent_type: "plan", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "prometheus", abort: new AbortController().signal } + ) + + //#then + expect(result).toContain("plan-family") + }) + + test("plan cannot delegate to prometheus (cross-blocking)", async () => { + //#given + const { createDelegateTask } = require("./tools") + const mockClient = { + app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, + config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, + session: { get: async () => ({ data: { directory: "/project" } }), create: async () => ({ data: { id: "s" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), messages: async () => ({ data: [] }), status: async () => ({ data: {} }) }, + } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) + + //#when + const result = await tool.execute( + { description: "test", prompt: "Execute", subagent_type: "prometheus", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "plan", abort: new AbortController().signal } + ) + + //#then + expect(result).toContain("plan-family") + }) + + test("sisyphus CAN delegate to plan (not in plan family)", async () => { + //#given + const { createDelegateTask } = require("./tools") const mockClient = { app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "test-session" } }), + create: async () => ({ data: { id: "ses_ok" } }), prompt: async () => ({ data: {} }), promptAsync: async () => ({ data: {} }), - messages: async () => ({ data: [] }), - status: async () => ({ data: {} }), + messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] }), + status: async () => ({ data: { "ses_ok": { type: "idle" } } }), }, } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "plan", - abort: new AbortController().signal, - } + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) - //#when - plan agent tries to delegate to plan + //#when const result = await tool.execute( - { - description: "Test self-delegation block", - prompt: "Create a plan", - subagent_type: "plan", - run_in_background: false, - load_skills: [], - }, - toolContext + { description: "test", prompt: "Create a plan", subagent_type: "plan", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "sisyphus", abort: new AbortController().signal } ) - //#then - should return error telling plan agent to create plan directly - expect(result).toContain("plan agent") - expect(result).toContain("directly") - }) - - test("prometheus is NOT a plan agent - can delegate to plan normally", async () => { - //#given - current agent is prometheus (no longer treated as plan agent) - const { createDelegateTask } = require("./tools") - - const mockManager = { launch: async () => ({}) } - const mockClient = { - app: { agents: async () => ({ data: [{ name: "plan", mode: "subagent" }] }) }, - config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_plan_from_prometheus" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ - data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created successfully" }] }] - }), - status: async () => ({ data: { "ses_plan_from_prometheus": { type: "idle" } } }), - }, - } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "prometheus", - abort: new AbortController().signal, - } - - //#when - prometheus delegates to plan (should work now) - const result = await tool.execute( - { - description: "Test plan delegation from prometheus", - prompt: "Create a plan", - subagent_type: "plan", - run_in_background: false, - load_skills: [], - }, - toolContext - ) - - //#then - should proceed normally (prometheus is not plan agent) - expect(result).not.toContain("Cannot delegate") - expect(result).toContain("Plan created successfully") + //#then + expect(result).not.toContain("plan-family") + expect(result).toContain("Plan created") }, { timeout: 20000 }) - - test("planner agent self-delegation is also blocked", async () => { - //#given - current agent is planner - const { createDelegateTask } = require("./tools") - - const mockManager = { launch: async () => ({}) } - const mockClient = { - app: { agents: async () => ({ data: [{ name: "planner", mode: "subagent" }] }) }, - config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, - session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "test-session" } }), - prompt: async () => ({ data: {} }), - promptAsync: async () => ({ data: {} }), - messages: async () => ({ data: [] }), - status: async () => ({ data: {} }), - }, - } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) - - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "planner", - abort: new AbortController().signal, - } - - //#when - planner tries to delegate to plan - const result = await tool.execute( - { - description: "Test planner self-delegation block", - prompt: "Create a plan", - subagent_type: "plan", - run_in_background: false, - load_skills: [], - }, - toolContext - ) - - //#then - should return error (planner is a plan agent alias) - expect(result).toContain("plan agent") - expect(result).toContain("directly") - }) }) describe("subagent_type model extraction (issue #1225)", () => { @@ -3314,59 +3293,33 @@ describe("sisyphus-task", () => { expect(promptBody.tools.task).toBe(true) }, { timeout: 20000 }) - test("prometheus subagent should NOT have task permission (decoupled from plan)", async () => { - //#given - sisyphus delegates to prometheus (no longer a plan agent) + test("prometheus subagent should have task permission (plan family)", async () => { + //#given const { createDelegateTask } = require("./tools") let promptBody: any - - const mockManager = { launch: async () => ({}) } - - const promptMock = async (input: any) => { - promptBody = input.body - return { data: {} } - } - + const promptMock = async (input: any) => { promptBody = input.body; return { data: {} } } const mockClient = { app: { agents: async () => ({ data: [{ name: "prometheus", mode: "subagent" }] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_prometheus_no_task" } }), + create: async () => ({ data: { id: "ses_prometheus_task" } }), prompt: promptMock, promptAsync: promptMock, - messages: async () => ({ - data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] - }), - status: async () => ({ data: { "ses_prometheus_no_task": { type: "idle" } } }), + messages: async () => ({ data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Plan created" }] }] }), + status: async () => ({ data: { "ses_prometheus_task": { type: "idle" } } }), }, } - - const tool = createDelegateTask({ - manager: mockManager, - client: mockClient, - }) + const tool = createDelegateTask({ manager: { launch: async () => ({}) }, client: mockClient }) - const toolContext = { - sessionID: "parent-session", - messageID: "parent-message", - agent: "sisyphus", - abort: new AbortController().signal, - } - - //#when - sisyphus delegates to prometheus + //#when await tool.execute( - { - description: "Test prometheus no task permission", - prompt: "Create a plan", - subagent_type: "prometheus", - run_in_background: false, - load_skills: [], - }, - toolContext + { description: "Test prometheus task permission", prompt: "Create a plan", subagent_type: "prometheus", run_in_background: false, load_skills: [] }, + { sessionID: "p", messageID: "m", agent: "sisyphus", abort: new AbortController().signal } ) - //#then - prometheus should NOT have task permission (it's not a plan agent) - expect(promptBody.tools.task).toBe(false) + //#then + expect(promptBody.tools.task).toBe(true) }, { timeout: 20000 }) test("non-plan subagent should NOT have task permission", async () => {