diff --git a/src/cli/__snapshots__/model-fallback.test.ts.snap b/src/cli/__snapshots__/model-fallback.test.ts.snap index 255c39bb5..8602b7377 100644 --- a/src/cli/__snapshots__/model-fallback.test.ts.snap +++ b/src/cli/__snapshots__/model-fallback.test.ts.snap @@ -31,6 +31,9 @@ exports[`generateModelConfig no providers available returns ULTIMATE_FALLBACK fo "prometheus": { "model": "opencode/glm-4.7-free", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "artistry": { @@ -71,9 +74,6 @@ exports[`generateModelConfig single native provider uses Claude models when only "explore": { "model": "anthropic/claude-haiku-4-5", }, - "librarian": { - "model": "anthropic/claude-sonnet-4-5", - }, "metis": { "model": "anthropic/claude-opus-4-6", "variant": "max", @@ -97,6 +97,9 @@ exports[`generateModelConfig single native provider uses Claude models when only "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "quick": { @@ -133,9 +136,6 @@ exports[`generateModelConfig single native provider uses Claude models with isMa "explore": { "model": "anthropic/claude-haiku-4-5", }, - "librarian": { - "model": "anthropic/claude-sonnet-4-5", - }, "metis": { "model": "anthropic/claude-opus-4-6", "variant": "max", @@ -159,6 +159,9 @@ exports[`generateModelConfig single native provider uses Claude models with isMa "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "quick": { @@ -191,8 +194,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json", "agents": { "atlas": { - "model": "openai/gpt-5.4", - "variant": "medium", + "model": "opencode/glm-4.7-free", }, "explore": { "model": "openai/gpt-5.4", @@ -207,8 +209,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only "variant": "medium", }, "metis": { - "model": "openai/gpt-5.4", - "variant": "high", + "model": "opencode/glm-4.7-free", }, "momus": { "model": "openai/gpt-5.4", @@ -230,6 +231,9 @@ exports[`generateModelConfig single native provider uses OpenAI models when only "model": "openai/gpt-5.4", "variant": "medium", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "artistry": { @@ -273,8 +277,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json", "agents": { "atlas": { - "model": "openai/gpt-5.4", - "variant": "medium", + "model": "opencode/glm-4.7-free", }, "explore": { "model": "openai/gpt-5.4", @@ -289,8 +292,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa "variant": "medium", }, "metis": { - "model": "openai/gpt-5.4", - "variant": "high", + "model": "opencode/glm-4.7-free", }, "momus": { "model": "openai/gpt-5.4", @@ -312,6 +314,9 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa "model": "openai/gpt-5.4", "variant": "medium", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "artistry": { @@ -355,24 +360,20 @@ exports[`generateModelConfig single native provider uses Gemini models when only "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json", "agents": { "atlas": { - "model": "google/gemini-3.1-pro-preview", + "model": "opencode/glm-4.7-free", }, "explore": { "model": "opencode/gpt-5-nano", }, - "librarian": { - "model": "opencode/glm-4.7-free", - }, "metis": { - "model": "google/gemini-3.1-pro-preview", - "variant": "high", + "model": "opencode/glm-4.7-free", }, "momus": { "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "multimodal-looker": { - "model": "google/gemini-3-flash-preview", + "model": "opencode/glm-4.7-free", }, "oracle": { "model": "google/gemini-3.1-pro-preview", @@ -381,6 +382,9 @@ exports[`generateModelConfig single native provider uses Gemini models when only "prometheus": { "model": "google/gemini-3.1-pro-preview", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "artistry": { @@ -416,24 +420,20 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json", "agents": { "atlas": { - "model": "google/gemini-3.1-pro-preview", + "model": "opencode/glm-4.7-free", }, "explore": { "model": "opencode/gpt-5-nano", }, - "librarian": { - "model": "opencode/glm-4.7-free", - }, "metis": { - "model": "google/gemini-3.1-pro-preview", - "variant": "high", + "model": "opencode/glm-4.7-free", }, "momus": { "model": "google/gemini-3.1-pro-preview", "variant": "high", }, "multimodal-looker": { - "model": "google/gemini-3-flash-preview", + "model": "opencode/glm-4.7-free", }, "oracle": { "model": "google/gemini-3.1-pro-preview", @@ -442,6 +442,9 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa "prometheus": { "model": "google/gemini-3.1-pro-preview", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "artistry": { @@ -486,9 +489,6 @@ exports[`generateModelConfig all native providers uses preferred models from fal "model": "openai/gpt-5.3-codex", "variant": "medium", }, - "librarian": { - "model": "anthropic/claude-sonnet-4-5", - }, "metis": { "model": "anthropic/claude-opus-4-6", "variant": "max", @@ -513,6 +513,9 @@ exports[`generateModelConfig all native providers uses preferred models from fal "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -561,9 +564,6 @@ exports[`generateModelConfig all native providers uses preferred models with isM "model": "openai/gpt-5.3-codex", "variant": "medium", }, - "librarian": { - "model": "anthropic/claude-sonnet-4-5", - }, "metis": { "model": "anthropic/claude-opus-4-6", "variant": "max", @@ -588,6 +588,9 @@ exports[`generateModelConfig all native providers uses preferred models with isM "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -637,9 +640,6 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on "model": "opencode/gpt-5.3-codex", "variant": "medium", }, - "librarian": { - "model": "opencode/glm-4.7-free", - }, "metis": { "model": "opencode/claude-opus-4-6", "variant": "max", @@ -664,6 +664,9 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on "model": "opencode/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "opencode/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -712,9 +715,6 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is "model": "opencode/gpt-5.3-codex", "variant": "medium", }, - "librarian": { - "model": "opencode/glm-4.7-free", - }, "metis": { "model": "opencode/claude-opus-4-6", "variant": "max", @@ -739,6 +739,9 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is "model": "opencode/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "opencode/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -784,9 +787,6 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when "explore": { "model": "github-copilot/gpt-5-mini", }, - "librarian": { - "model": "github-copilot/claude-sonnet-4.5", - }, "metis": { "model": "github-copilot/claude-opus-4.6", "variant": "max", @@ -796,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when "variant": "xhigh", }, "multimodal-looker": { - "model": "github-copilot/gemini-3-flash-preview", + "model": "opencode/glm-4.7-free", }, "oracle": { "model": "github-copilot/gpt-5.4", @@ -810,6 +810,9 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when "model": "github-copilot/claude-opus-4.6", "variant": "max", }, + "sisyphus-junior": { + "model": "github-copilot/claude-sonnet-4.6", + }, }, "categories": { "artistry": { @@ -850,9 +853,6 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with "explore": { "model": "github-copilot/gpt-5-mini", }, - "librarian": { - "model": "github-copilot/claude-sonnet-4.5", - }, "metis": { "model": "github-copilot/claude-opus-4.6", "variant": "max", @@ -862,7 +862,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with "variant": "xhigh", }, "multimodal-looker": { - "model": "github-copilot/gemini-3-flash-preview", + "model": "opencode/glm-4.7-free", }, "oracle": { "model": "github-copilot/gpt-5.4", @@ -876,6 +876,9 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with "model": "github-copilot/claude-opus-4.6", "variant": "max", }, + "sisyphus-junior": { + "model": "github-copilot/claude-sonnet-4.6", + }, }, "categories": { "artistry": { @@ -938,6 +941,9 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe "sisyphus": { "model": "zai-coding-plan/glm-5", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "quick": { @@ -993,6 +999,9 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit "sisyphus": { "model": "zai-coding-plan/glm-5", }, + "sisyphus-junior": { + "model": "opencode/glm-4.7-free", + }, }, "categories": { "quick": { @@ -1031,9 +1040,6 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen "model": "opencode/gpt-5.3-codex", "variant": "medium", }, - "librarian": { - "model": "opencode/glm-4.7-free", - }, "metis": { "model": "anthropic/claude-opus-4-6", "variant": "max", @@ -1058,6 +1064,9 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -1106,9 +1115,6 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb "model": "openai/gpt-5.3-codex", "variant": "medium", }, - "librarian": { - "model": "github-copilot/claude-sonnet-4.5", - }, "metis": { "model": "github-copilot/claude-opus-4.6", "variant": "max", @@ -1133,6 +1139,9 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb "model": "github-copilot/claude-opus-4.6", "variant": "max", }, + "sisyphus-junior": { + "model": "github-copilot/claude-sonnet-4.6", + }, }, "categories": { "artistry": { @@ -1203,6 +1212,9 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "quick": { @@ -1238,9 +1250,6 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi "explore": { "model": "anthropic/claude-haiku-4-5", }, - "librarian": { - "model": "anthropic/claude-sonnet-4-5", - }, "metis": { "model": "anthropic/claude-opus-4-6", "variant": "max", @@ -1250,7 +1259,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi "variant": "max", }, "multimodal-looker": { - "model": "google/gemini-3-flash-preview", + "model": "opencode/glm-4.7-free", }, "oracle": { "model": "google/gemini-3.1-pro-preview", @@ -1264,6 +1273,9 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -1335,6 +1347,9 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider "model": "github-copilot/claude-opus-4.6", "variant": "max", }, + "sisyphus-junior": { + "model": "github-copilot/claude-sonnet-4.6", + }, }, "categories": { "artistry": { @@ -1410,6 +1425,9 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "artistry": { @@ -1485,6 +1503,9 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is "model": "anthropic/claude-opus-4-6", "variant": "max", }, + "sisyphus-junior": { + "model": "anthropic/claude-sonnet-4-6", + }, }, "categories": { "artistry": { diff --git a/src/cli/model-fallback.test.ts b/src/cli/model-fallback.test.ts index 5abf4b4c0..bd3e38447 100644 --- a/src/cli/model-fallback.test.ts +++ b/src/cli/model-fallback.test.ts @@ -495,15 +495,15 @@ describe("generateModelConfig", () => { expect(result.agents?.librarian?.model).toBe("zai-coding-plan/glm-4.7") }) - test("librarian falls back to generic chain result when no librarian provider matches", () => { - // #given only Claude is available (no ZAI) + test("librarian is omitted when no librarian provider matches", () => { + // #given only Claude is available (no opencode-go or ZAI) const config = createConfig({ hasClaude: true }) // #when generateModelConfig is called const result = generateModelConfig(config) - // #then librarian should use generic chain result when chain providers are unavailable - expect(result.agents?.librarian?.model).toBe("anthropic/claude-sonnet-4-5") + // #then librarian should be omitted when its dedicated providers are unavailable + expect(result.agents?.librarian).toBeUndefined() }) }) diff --git a/src/features/background-agent/manager-shutdown-global-cleanup.test.ts b/src/features/background-agent/manager-shutdown-global-cleanup.test.ts new file mode 100644 index 000000000..d238b2dc0 --- /dev/null +++ b/src/features/background-agent/manager-shutdown-global-cleanup.test.ts @@ -0,0 +1,97 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test" +import { tmpdir } from "node:os" + +import { _resetForTesting, subagentSessions } from "../claude-code-session-state" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" +import { BackgroundManager } from "./manager" +import type { BackgroundTask } from "./types" + +function createTask(overrides: Partial & { id: string; sessionID: string }): BackgroundTask { + return { + parentSessionID: "parent-session", + parentMessageID: "parent-message", + description: "test task", + prompt: "test prompt", + agent: "explore", + status: "running", + startedAt: new Date(), + ...overrides, + } +} + +function createBackgroundManager(): BackgroundManager { + return new BackgroundManager({ + client: { + session: { + abort: async () => ({}), + prompt: async () => ({}), + promptAsync: async () => ({}), + }, + } as never, + project: {} as never, + directory: tmpdir(), + worktree: tmpdir(), + serverUrl: new URL("https://example.com"), + $: {} as never, + } as never) +} + +describe("BackgroundManager shutdown global cleanup", () => { + beforeEach(() => { + // given + _resetForTesting() + SessionCategoryRegistry.clear() + }) + + afterEach(() => { + // given + _resetForTesting() + SessionCategoryRegistry.clear() + }) + + test("removes tracked session IDs from subagentSessions and SessionCategoryRegistry on shutdown", async () => { + // given + const runningSessionID = "ses-running-shutdown-cleanup" + const completedSessionID = "ses-completed-shutdown-cleanup" + const unrelatedSessionID = "ses-unrelated-shutdown-cleanup" + const manager = createBackgroundManager() + const tasks = new Map([ + [ + "task-running-shutdown-cleanup", + createTask({ + id: "task-running-shutdown-cleanup", + sessionID: runningSessionID, + }), + ], + [ + "task-completed-shutdown-cleanup", + createTask({ + id: "task-completed-shutdown-cleanup", + sessionID: completedSessionID, + status: "completed", + completedAt: new Date(), + }), + ], + ]) + + Object.assign(manager, { tasks }) + + subagentSessions.add(runningSessionID) + subagentSessions.add(completedSessionID) + subagentSessions.add(unrelatedSessionID) + SessionCategoryRegistry.register(runningSessionID, "quick") + SessionCategoryRegistry.register(completedSessionID, "deep") + SessionCategoryRegistry.register(unrelatedSessionID, "test") + + // when + await manager.shutdown() + + // then + expect(subagentSessions.has(runningSessionID)).toBe(false) + expect(subagentSessions.has(completedSessionID)).toBe(false) + expect(subagentSessions.has(unrelatedSessionID)).toBe(true) + expect(SessionCategoryRegistry.has(runningSessionID)).toBe(false) + expect(SessionCategoryRegistry.has(completedSessionID)).toBe(false) + expect(SessionCategoryRegistry.has(unrelatedSessionID)).toBe(true) + }) +}) diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index 67ddd8b44..8fd3e2133 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -2111,6 +2111,254 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => { // then await expect(result).rejects.toThrow("background_task.maxDescendants cannot be enforced safely") }) + + test("should release descendant quota when queued task is cancelled before session starts", async () => { + // given + manager.shutdown() + manager = new BackgroundManager( + { + client: createMockClientWithSessionChain({ + "session-root": { directory: "/test/dir" }, + }), + directory: tmpdir(), + } as unknown as PluginInput, + { defaultConcurrency: 1, maxDescendants: 2 }, + ) + + const input = { + description: "Test task", + prompt: "Do something", + agent: "test-agent", + parentSessionID: "session-root", + parentMessageID: "parent-message", + } + + await manager.launch(input) + const queuedTask = await manager.launch(input) + await new Promise(resolve => setTimeout(resolve, 50)) + expect(manager.getTask(queuedTask.id)?.status).toBe("pending") + + // when + const cancelled = manager.cancelPendingTask(queuedTask.id) + const replacementTask = await manager.launch(input) + + // then + expect(cancelled).toBe(true) + expect(replacementTask.status).toBe("pending") + }) + + test("should release descendant quota when session creation fails before session starts", async () => { + // given + let createAttempts = 0 + manager.shutdown() + manager = new BackgroundManager( + { + client: { + session: { + create: async () => { + createAttempts += 1 + if (createAttempts === 1) { + return { error: "session create failed", data: undefined } + } + + return { data: { id: `ses_${crypto.randomUUID()}` } } + }, + get: async () => ({ data: { directory: "/test/dir" } }), + prompt: async () => ({}), + promptAsync: async () => ({}), + messages: async () => ({ data: [] }), + todo: async () => ({ data: [] }), + status: async () => ({ data: {} }), + abort: async () => ({}), + }, + }, + directory: tmpdir(), + } as unknown as PluginInput, + { maxDescendants: 1 }, + ) + + const input = { + description: "Test task", + prompt: "Do something", + agent: "test-agent", + parentSessionID: "session-root", + parentMessageID: "parent-message", + } + + await manager.launch(input) + await new Promise(resolve => setTimeout(resolve, 50)) + expect(createAttempts).toBe(1) + + // when + const retryTask = await manager.launch(input) + + // then + expect(retryTask.status).toBe("pending") + }) + + test("should keep the next queued task when the first task is cancelled during session creation", async () => { + // given + const firstSessionID = "ses-first-cancelled-during-create" + const secondSessionID = "ses-second-survives-queue" + let createCallCount = 0 + let resolveFirstCreate: ((value: { data: { id: string } }) => void) | undefined + let resolveFirstCreateStarted: (() => void) | undefined + let resolveSecondPromptAsync: (() => void) | undefined + const firstCreateStarted = new Promise((resolve) => { + resolveFirstCreateStarted = resolve + }) + const secondPromptAsyncStarted = new Promise((resolve) => { + resolveSecondPromptAsync = resolve + }) + + manager.shutdown() + manager = new BackgroundManager( + { + client: { + session: { + create: async () => { + createCallCount += 1 + if (createCallCount === 1) { + resolveFirstCreateStarted?.() + return await new Promise<{ data: { id: string } }>((resolve) => { + resolveFirstCreate = resolve + }) + } + + return { data: { id: secondSessionID } } + }, + get: async () => ({ data: { directory: "/test/dir" } }), + prompt: async () => ({}), + promptAsync: async ({ path }: { path: { id: string } }) => { + if (path.id === secondSessionID) { + resolveSecondPromptAsync?.() + } + + return {} + }, + messages: async () => ({ data: [] }), + todo: async () => ({ data: [] }), + status: async () => ({ data: {} }), + abort: async () => ({}), + }, + }, + directory: tmpdir(), + } as unknown as PluginInput, + { defaultConcurrency: 1 } + ) + + const input = { + description: "Test task", + prompt: "Do something", + agent: "test-agent", + parentSessionID: "parent-session", + parentMessageID: "parent-message", + } + + const firstTask = await manager.launch(input) + const secondTask = await manager.launch(input) + await firstCreateStarted + + // when + const cancelled = await manager.cancelTask(firstTask.id, { + source: "test", + abortSession: false, + }) + resolveFirstCreate?.({ data: { id: firstSessionID } }) + + await Promise.race([ + secondPromptAsyncStarted, + new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 100)), + ]) + + // then + expect(cancelled).toBe(true) + expect(createCallCount).toBe(2) + expect(manager.getTask(firstTask.id)?.status).toBe("cancelled") + expect(manager.getTask(secondTask.id)?.status).toBe("running") + expect(manager.getTask(secondTask.id)?.sessionID).toBe(secondSessionID) + }) + + test("should keep task cancelled and abort the session when cancellation wins during session creation", async () => { + // given + const createdSessionID = "ses-cancelled-during-create" + let resolveCreate: ((value: { data: { id: string } }) => void) | undefined + let resolveCreateStarted: (() => void) | undefined + let resolveAbortCalled: (() => void) | undefined + const createStarted = new Promise((resolve) => { + resolveCreateStarted = resolve + }) + const abortCalled = new Promise((resolve) => { + resolveAbortCalled = resolve + }) + const abortCalls: string[] = [] + const promptAsyncSessionIDs: string[] = [] + + manager.shutdown() + manager = new BackgroundManager( + { + client: { + session: { + create: async () => { + resolveCreateStarted?.() + return await new Promise<{ data: { id: string } }>((resolve) => { + resolveCreate = resolve + }) + }, + get: async () => ({ data: { directory: "/test/dir" } }), + prompt: async () => ({}), + promptAsync: async ({ path }: { path: { id: string } }) => { + promptAsyncSessionIDs.push(path.id) + return {} + }, + messages: async () => ({ data: [] }), + todo: async () => ({ data: [] }), + status: async () => ({ data: {} }), + abort: async ({ path }: { path: { id: string } }) => { + abortCalls.push(path.id) + resolveAbortCalled?.() + return {} + }, + }, + }, + directory: tmpdir(), + } as unknown as PluginInput, + { defaultConcurrency: 1 } + ) + + const input = { + description: "Test task", + prompt: "Do something", + agent: "test-agent", + parentSessionID: "parent-session", + parentMessageID: "parent-message", + } + + const task = await manager.launch(input) + await createStarted + + // when + const cancelled = await manager.cancelTask(task.id, { + source: "test", + abortSession: false, + }) + resolveCreate?.({ data: { id: createdSessionID } }) + + await Promise.race([ + abortCalled, + new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 100)), + ]) + await Promise.resolve() + + // then + const updatedTask = manager.getTask(task.id) + expect(cancelled).toBe(true) + expect(updatedTask?.status).toBe("cancelled") + expect(updatedTask?.sessionID).toBeUndefined() + expect(promptAsyncSessionIDs).not.toContain(createdSessionID) + expect(abortCalls).toEqual([createdSessionID]) + expect(getConcurrencyManager(manager).getCount("test-agent")).toBe(0) + }) }) describe("pending task can be cancelled", () => { diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index d133f2d77..0947b2d17 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -125,6 +125,7 @@ export class BackgroundManager { private idleDeferralTimers: Map> = new Map() private notificationQueueByParent: Map> = new Map() private rootDescendantCounts: Map + private preStartDescendantReservations: Set private enableParentSessionNotifications: boolean readonly taskHistory = new TaskHistory() @@ -150,6 +151,7 @@ export class BackgroundManager { this.onSubagentSessionCreated = options?.onSubagentSessionCreated this.onShutdown = options?.onShutdown this.rootDescendantCounts = new Map() + this.preStartDescendantReservations = new Set() this.enableParentSessionNotifications = options?.enableParentSessionNotifications ?? true this.registerProcessCleanup() } @@ -220,6 +222,26 @@ export class BackgroundManager { this.rootDescendantCounts.set(rootSessionID, currentCount - 1) } + private markPreStartDescendantReservation(task: BackgroundTask): void { + this.preStartDescendantReservations.add(task.id) + } + + private settlePreStartDescendantReservation(task: BackgroundTask): void { + this.preStartDescendantReservations.delete(task.id) + } + + private rollbackPreStartDescendantReservation(task: BackgroundTask): void { + if (!this.preStartDescendantReservations.delete(task.id)) { + return + } + + if (!task.rootSessionID) { + return + } + + this.unregisterRootDescendant(task.rootSessionID) + } + async launch(input: LaunchInput): Promise { log("[background-agent] launch() called with:", { agent: input.agent, @@ -296,6 +318,7 @@ export class BackgroundManager { } spawnReservation.commit() + this.markPreStartDescendantReservation(task) // Trigger processing (fire-and-forget) this.processKey(key) @@ -317,13 +340,16 @@ export class BackgroundManager { try { const queue = this.queuesByKey.get(key) while (queue && queue.length > 0) { - const item = queue[0] + const item = queue.shift() + if (!item) { + continue + } await this.concurrencyManager.acquire(key) if (item.task.status === "cancelled" || item.task.status === "error" || item.task.status === "interrupt") { + this.rollbackPreStartDescendantReservation(item.task) this.concurrencyManager.release(key) - queue.shift() continue } @@ -331,6 +357,7 @@ export class BackgroundManager { await this.startTask(item) } catch (error) { log("[background-agent] Error starting task:", error) + this.rollbackPreStartDescendantReservation(item.task) if (item.task.concurrencyKey) { this.concurrencyManager.release(item.task.concurrencyKey) item.task.concurrencyKey = undefined @@ -338,8 +365,6 @@ export class BackgroundManager { this.concurrencyManager.release(key) } } - - queue.shift() } } finally { this.processingKeys.delete(key) @@ -386,6 +411,18 @@ export class BackgroundManager { } const sessionID = createResult.data.id + + if (task.status === "cancelled") { + await this.client.session.abort({ + path: { id: sessionID }, + }).catch((error) => { + log("[background-agent] Failed to abort cancelled pre-start session:", error) + }) + this.concurrencyManager.release(concurrencyKey) + return + } + + this.settlePreStartDescendantReservation(task) subagentSessions.add(sessionID) log("[background-agent] tmux callback check", { @@ -1204,6 +1241,7 @@ export class BackgroundManager { } } } + this.rollbackPreStartDescendantReservation(task) log("[background-agent] Cancelled pending task:", { taskId, key }) } @@ -1707,9 +1745,14 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea this.shutdownTriggered = true log("[background-agent] Shutting down BackgroundManager") this.stopPolling() + const trackedSessionIDs = new Set() // Abort all running sessions to prevent zombie processes (#1240) for (const task of this.tasks.values()) { + if (task.sessionID) { + trackedSessionIDs.add(task.sessionID) + } + if (task.status === "running" && task.sessionID) { this.client.session.abort({ path: { id: task.sessionID }, @@ -1744,6 +1787,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea } this.idleDeferralTimers.clear() + for (const sessionID of trackedSessionIDs) { + subagentSessions.delete(sessionID) + SessionCategoryRegistry.remove(sessionID) + } + this.concurrencyManager.clear() this.tasks.clear() this.notifications.clear() diff --git a/src/hooks/atlas/atlas-hook.ts b/src/hooks/atlas/atlas-hook.ts index 97d0842d7..855cdacb6 100644 --- a/src/hooks/atlas/atlas-hook.ts +++ b/src/hooks/atlas/atlas-hook.ts @@ -21,6 +21,6 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) { return { handler: createAtlasEventHandler({ ctx, options, sessions, getState }), "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }), - "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit }), + "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths, autoCommit, getState }), } } diff --git a/src/hooks/atlas/boulder-session-lineage.ts b/src/hooks/atlas/boulder-session-lineage.ts new file mode 100644 index 000000000..0868f3bb8 --- /dev/null +++ b/src/hooks/atlas/boulder-session-lineage.ts @@ -0,0 +1,44 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import { log } from "../../shared/logger" +import { HOOK_NAME } from "./hook-name" + +export async function isSessionInBoulderLineage(input: { + client: PluginInput["client"] + sessionID: string + boulderSessionIDs: string[] +}): Promise { + const visitedSessionIDs = new Set() + let currentSessionID = input.sessionID + + while (!visitedSessionIDs.has(currentSessionID)) { + visitedSessionIDs.add(currentSessionID) + + const sessionResult = await input.client.session + .get({ path: { id: currentSessionID } }) + .catch((error: unknown) => { + log(`[${HOOK_NAME}] Failed to resolve session lineage`, { + sessionID: input.sessionID, + currentSessionID, + error, + }) + return null + }) + + if (!sessionResult || sessionResult.error) { + return false + } + + const parentSessionID = sessionResult.data?.parentID + if (!parentSessionID) { + return false + } + + if (input.boulderSessionIDs.includes(parentSessionID)) { + return true + } + + currentSessionID = parentSessionID + } + + return false +} diff --git a/src/hooks/atlas/event-handler.ts b/src/hooks/atlas/event-handler.ts index e6084c596..95cdbe531 100644 --- a/src/hooks/atlas/event-handler.ts +++ b/src/hooks/atlas/event-handler.ts @@ -38,11 +38,15 @@ export function createAtlasEventHandler(input: { if (event.type === "message.updated") { const info = props?.info as Record | undefined const sessionID = info?.sessionID as string | undefined + const role = info?.role as string | undefined if (!sessionID) return const state = sessions.get(sessionID) if (state) { state.lastEventWasAbortError = false + if (role === "user") { + state.waitingForFinalWaveApproval = false + } } return } diff --git a/src/hooks/atlas/final-wave-approval-gate.test.ts b/src/hooks/atlas/final-wave-approval-gate.test.ts new file mode 100644 index 000000000..5812c4ba1 --- /dev/null +++ b/src/hooks/atlas/final-wave-approval-gate.test.ts @@ -0,0 +1,224 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test" +import { randomUUID } from "node:crypto" +import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { createOpencodeClient } from "@opencode-ai/sdk" +import type { AssistantMessage, Session } from "@opencode-ai/sdk" +import type { BoulderState } from "../../features/boulder-state" +import { clearBoulderState, writeBoulderState } from "../../features/boulder-state" + +const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-final-wave-storage-${randomUUID()}`) +const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message") +const TEST_PART_STORAGE = join(TEST_STORAGE_ROOT, "part") + +mock.module("../../features/hook-message-injector/constants", () => ({ + OPENCODE_STORAGE: TEST_STORAGE_ROOT, + MESSAGE_STORAGE: TEST_MESSAGE_STORAGE, + PART_STORAGE: TEST_PART_STORAGE, +})) + +mock.module("../../shared/opencode-message-dir", () => ({ + getMessageDir: (sessionID: string) => { + const directoryPath = join(TEST_MESSAGE_STORAGE, sessionID) + return existsSync(directoryPath) ? directoryPath : null + }, +})) + +mock.module("../../shared/opencode-storage-detection", () => ({ + isSqliteBackend: () => false, +})) + +const { createAtlasHook } = await import("./index") +const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector") + +type AtlasHookContext = Parameters[0] +type PromptMock = ReturnType + +describe("Atlas final verification approval gate", () => { + let testDirectory = "" + + function createMockPluginInput(): AtlasHookContext & { _promptMock: PromptMock } { + const client = createOpencodeClient({ baseUrl: "http://localhost" }) + const promptMock = mock((input: unknown) => input) + + Reflect.set(client.session, "prompt", async (input: unknown) => { + promptMock(input) + return { + data: { info: {} as AssistantMessage, parts: [] }, + request: new Request("http://localhost/session/prompt"), + response: new Response(), + } + }) + + Reflect.set(client.session, "promptAsync", async (input: unknown) => { + promptMock(input) + return { + data: undefined, + request: new Request("http://localhost/session/prompt_async"), + response: new Response(), + } + }) + + Reflect.set(client.session, "get", async () => { + return { + data: { parentID: "main-session-123" } as Session, + request: new Request("http://localhost/session/main-session-123"), + response: new Response(), + } + }) + + return { + directory: testDirectory, + project: {} as AtlasHookContext["project"], + worktree: testDirectory, + serverUrl: new URL("http://localhost"), + $: {} as AtlasHookContext["$"], + client, + _promptMock: promptMock, + } + } + + function setupMessageStorage(sessionID: string): void { + const messageDirectory = join(MESSAGE_STORAGE, sessionID) + if (!existsSync(messageDirectory)) { + mkdirSync(messageDirectory, { recursive: true }) + } + + writeFileSync( + join(messageDirectory, "msg_test001.json"), + JSON.stringify({ + agent: "atlas", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }), + ) + } + + function cleanupMessageStorage(sessionID: string): void { + const messageDirectory = join(MESSAGE_STORAGE, sessionID) + if (existsSync(messageDirectory)) { + rmSync(messageDirectory, { recursive: true, force: true }) + } + } + + beforeEach(() => { + testDirectory = join(tmpdir(), `atlas-final-wave-test-${randomUUID()}`) + mkdirSync(join(testDirectory, ".sisyphus"), { recursive: true }) + clearBoulderState(testDirectory) + }) + + afterEach(() => { + clearBoulderState(testDirectory) + if (existsSync(testDirectory)) { + rmSync(testDirectory, { recursive: true, force: true }) + } + }) + + test("waits for explicit user approval after the last final-wave approval arrives", async () => { + // given + const sessionID = "atlas-final-wave-session" + setupMessageStorage(sessionID) + + const planPath = join(testDirectory, "final-wave-plan.md") + writeFileSync( + planPath, + `# Plan + +## TODOs +- [x] 1. Ship the implementation + +## Final Verification Wave (MANDATORY - after ALL implementation tasks) +- [x] F1. **Plan Compliance Audit** - \`oracle\` +- [x] F2. **Code Quality Review** - \`unspecified-high\` +- [x] F3. **Real Manual QA** - \`unspecified-high\` +- [ ] F4. **Scope Fidelity Check** - \`deep\` +`, + ) + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [sessionID], + plan_name: "final-wave-plan", + agent: "atlas", + } + writeBoulderState(testDirectory, state) + + const mockInput = createMockPluginInput() + const hook = createAtlasHook(mockInput) + const toolOutput = { + title: "Sisyphus Task", + output: `Tasks [4/4 compliant] | Contamination [CLEAN] | Unaccounted [CLEAN] | VERDICT: APPROVE + + +session_id: ses_final_wave_review +`, + metadata: {}, + } + + // when + await hook["tool.execute.after"]({ tool: "task", sessionID }, toolOutput) + await hook.handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // then + expect(toolOutput.output).toContain("FINAL WAVE APPROVAL GATE") + expect(toolOutput.output).toContain("explicit user approval") + expect(toolOutput.output).not.toContain("STEP 8: PROCEED TO NEXT TASK") + expect(mockInput._promptMock).not.toHaveBeenCalled() + + cleanupMessageStorage(sessionID) + }) + + test("keeps normal auto-continue instructions for non-final tasks", async () => { + // given + const sessionID = "atlas-non-final-session" + setupMessageStorage(sessionID) + + const planPath = join(testDirectory, "implementation-plan.md") + writeFileSync( + planPath, + `# Plan + +## TODOs +- [x] 1. Setup +- [ ] 2. Implement feature + +## Final Verification Wave (MANDATORY - after ALL implementation tasks) +- [ ] F1. **Plan Compliance Audit** - \`oracle\` +- [ ] F2. **Code Quality Review** - \`unspecified-high\` +- [ ] F3. **Real Manual QA** - \`unspecified-high\` +- [ ] F4. **Scope Fidelity Check** - \`deep\` +`, + ) + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [sessionID], + plan_name: "implementation-plan", + agent: "atlas", + } + writeBoulderState(testDirectory, state) + + const hook = createAtlasHook(createMockPluginInput()) + const toolOutput = { + title: "Sisyphus Task", + output: `Implementation finished successfully + + +session_id: ses_feature_task +`, + metadata: {}, + } + + // when + await hook["tool.execute.after"]({ tool: "task", sessionID }, toolOutput) + + // then + expect(toolOutput.output).toContain("COMPLETION GATE") + expect(toolOutput.output).toContain("STEP 8: PROCEED TO NEXT TASK") + expect(toolOutput.output).not.toContain("FINAL WAVE APPROVAL GATE") + + cleanupMessageStorage(sessionID) + }) +}) diff --git a/src/hooks/atlas/final-wave-approval-gate.ts b/src/hooks/atlas/final-wave-approval-gate.ts new file mode 100644 index 000000000..9928bbe16 --- /dev/null +++ b/src/hooks/atlas/final-wave-approval-gate.ts @@ -0,0 +1,47 @@ +import { existsSync, readFileSync } from "node:fs" + +const APPROVE_VERDICT_PATTERN = /\bVERDICT:\s*APPROVE\b/i +const FINAL_VERIFICATION_HEADING_PATTERN = /^##\s+Final Verification Wave\b/i +const UNCHECKED_TASK_PATTERN = /^\s*[-*]\s*\[\s*\]\s*(.+)$/ +const FINAL_WAVE_TASK_PATTERN = /^F\d+\./i + +export function shouldPauseForFinalWaveApproval(input: { + planPath: string + taskOutput: string +}): boolean { + if (!APPROVE_VERDICT_PATTERN.test(input.taskOutput)) { + return false + } + + if (!existsSync(input.planPath)) { + return false + } + + try { + const content = readFileSync(input.planPath, "utf-8") + const lines = content.split(/\r?\n/) + let inFinalVerificationWave = false + let uncheckedTaskCount = 0 + let uncheckedFinalWaveTaskCount = 0 + + for (const line of lines) { + if (/^##\s+/.test(line)) { + inFinalVerificationWave = FINAL_VERIFICATION_HEADING_PATTERN.test(line) + } + + const uncheckedTaskMatch = line.match(UNCHECKED_TASK_PATTERN) + if (!uncheckedTaskMatch) { + continue + } + + uncheckedTaskCount += 1 + if (inFinalVerificationWave && FINAL_WAVE_TASK_PATTERN.test(uncheckedTaskMatch[1].trim())) { + uncheckedFinalWaveTaskCount += 1 + } + } + + return uncheckedTaskCount === 1 && uncheckedFinalWaveTaskCount === 1 + } catch { + return false + } +} diff --git a/src/hooks/atlas/idle-event-lineage.test.ts b/src/hooks/atlas/idle-event-lineage.test.ts new file mode 100644 index 000000000..e31e670d6 --- /dev/null +++ b/src/hooks/atlas/idle-event-lineage.test.ts @@ -0,0 +1,122 @@ +import { afterEach, beforeEach, describe, it } from "bun:test" +import assert from "node:assert/strict" +import { randomUUID } from "node:crypto" +import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { clearBoulderState, readBoulderState, writeBoulderState } from "../../features/boulder-state" +import type { BoulderState } from "../../features/boulder-state" +import { _resetForTesting, subagentSessions } from "../../features/claude-code-session-state" + +const { createAtlasHook } = await import("./index") + +describe("atlas hook idle-event session lineage", () => { + const MAIN_SESSION_ID = "main-session-123" + + let testDirectory = "" + let promptCalls: Array = [] + + function writeIncompleteBoulder(): void { + const planPath = join(testDirectory, "test-plan.md") + writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2") + + const state: BoulderState = { + active_plan: planPath, + started_at: "2026-01-02T10:00:00Z", + session_ids: [MAIN_SESSION_ID], + plan_name: "test-plan", + } + + writeBoulderState(testDirectory, state) + } + + function createHook(parentSessionIDs?: Record) { + return createAtlasHook({ + directory: testDirectory, + client: { + session: { + get: async (input: { path: { id: string } }) => ({ + data: { + parentID: parentSessionIDs?.[input.path.id], + }, + }), + messages: async () => ({ data: [] }), + prompt: async (input: unknown) => { + promptCalls.push(input) + return { data: {} } + }, + promptAsync: async (input: unknown) => { + promptCalls.push(input) + return { data: {} } + }, + }, + }, + } as unknown as Parameters[0]) + } + + beforeEach(() => { + testDirectory = join(tmpdir(), `atlas-idle-lineage-${randomUUID()}`) + if (!existsSync(testDirectory)) { + mkdirSync(testDirectory, { recursive: true }) + } + + promptCalls = [] + clearBoulderState(testDirectory) + _resetForTesting() + subagentSessions.clear() + }) + + afterEach(() => { + clearBoulderState(testDirectory) + if (existsSync(testDirectory)) { + rmSync(testDirectory, { recursive: true, force: true }) + } + + _resetForTesting() + }) + + it("does not append unrelated subagent sessions during idle", async () => { + const unrelatedSubagentSessionID = "subagent-session-unrelated" + const unrelatedParentSessionID = "unrelated-parent-session" + + writeIncompleteBoulder() + subagentSessions.add(unrelatedSubagentSessionID) + + const hook = createHook({ + [unrelatedSubagentSessionID]: unrelatedParentSessionID, + }) + + await hook.handler({ + event: { + type: "session.idle", + properties: { sessionID: unrelatedSubagentSessionID }, + }, + }) + + assert.equal(readBoulderState(testDirectory)?.session_ids.includes(unrelatedSubagentSessionID), false) + assert.equal(promptCalls.length, 0) + }) + + it("appends boulder-owned subagent sessions during idle when lineage reaches tracked session", async () => { + const subagentSessionID = "subagent-session-456" + const intermediateParentSessionID = "subagent-parent-789" + + writeIncompleteBoulder() + subagentSessions.add(subagentSessionID) + + const hook = createHook({ + [subagentSessionID]: intermediateParentSessionID, + [intermediateParentSessionID]: MAIN_SESSION_ID, + }) + + await hook.handler({ + event: { + type: "session.idle", + properties: { sessionID: subagentSessionID }, + }, + }) + + assert.equal(readBoulderState(testDirectory)?.session_ids.includes(subagentSessionID), true) + assert.equal(promptCalls.length, 1) + }) +}) diff --git a/src/hooks/atlas/idle-event.ts b/src/hooks/atlas/idle-event.ts index 50fd532b1..1f5cfeb2c 100644 --- a/src/hooks/atlas/idle-event.ts +++ b/src/hooks/atlas/idle-event.ts @@ -1,10 +1,9 @@ import type { PluginInput } from "@opencode-ai/plugin" -import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state" -import type { BoulderState, PlanProgress } from "../../features/boulder-state" -import { subagentSessions } from "../../features/claude-code-session-state" +import { getPlanProgress, readBoulderState } from "../../features/boulder-state" import { log } from "../../shared/logger" import { injectBoulderContinuation } from "./boulder-continuation-injector" import { HOOK_NAME } from "./hook-name" +import { resolveActiveBoulderSession } from "./resolve-active-boulder-session" import type { AtlasHookOptions, SessionState } from "./types" const CONTINUATION_COOLDOWN_MS = 5000 @@ -18,44 +17,6 @@ function hasRunningBackgroundTasks(sessionID: string, options?: AtlasHookOptions : false } -function resolveActiveBoulderSession(input: { - directory: string - sessionID: string -}): { - boulderState: BoulderState - progress: PlanProgress - appendedSession: boolean -} | null { - const boulderState = readBoulderState(input.directory) - if (!boulderState) { - return null - } - - const progress = getPlanProgress(boulderState.active_plan) - if (progress.isComplete) { - return { boulderState, progress, appendedSession: false } - } - - if (boulderState.session_ids.includes(input.sessionID)) { - return { boulderState, progress, appendedSession: false } - } - - if (!subagentSessions.has(input.sessionID)) { - return null - } - - const updatedBoulderState = appendSessionId(input.directory, input.sessionID) - if (!updatedBoulderState?.session_ids.includes(input.sessionID)) { - return null - } - - return { - boulderState: updatedBoulderState, - progress, - appendedSession: true, - } -} - async function injectContinuation(input: { ctx: PluginInput sessionID: string @@ -102,6 +63,7 @@ function scheduleRetry(input: { sessionState.pendingRetryTimer = undefined if (sessionState.promptFailureCount >= 2) return + if (sessionState.waitingForFinalWaveApproval) return const currentBoulder = readBoulderState(ctx.directory) if (!currentBoulder) return @@ -136,7 +98,8 @@ export async function handleAtlasSessionIdle(input: { log(`[${HOOK_NAME}] session.idle`, { sessionID }) - const activeBoulderSession = resolveActiveBoulderSession({ + const activeBoulderSession = await resolveActiveBoulderSession({ + client: ctx.client, directory: ctx.directory, sessionID, }) @@ -161,6 +124,11 @@ export async function handleAtlasSessionIdle(input: { const sessionState = getState(sessionID) const now = Date.now() + if (sessionState.waitingForFinalWaveApproval) { + log(`[${HOOK_NAME}] Skipped: waiting for explicit final-wave approval`, { sessionID }) + return + } + if (sessionState.lastEventWasAbortError) { sessionState.lastEventWasAbortError = false log(`[${HOOK_NAME}] Skipped: abort error immediately before idle`, { sessionID }) diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts index 22ca44c42..269d7928b 100644 --- a/src/hooks/atlas/index.test.ts +++ b/src/hooks/atlas/index.test.ts @@ -45,6 +45,7 @@ describe("atlas hook", () => { directory: TEST_DIR, client: { session: { + get: async () => ({ data: { parentID: "main-session-123" } }), prompt: promptMock, promptAsync: promptMock, }, diff --git a/src/hooks/atlas/resolve-active-boulder-session.ts b/src/hooks/atlas/resolve-active-boulder-session.ts new file mode 100644 index 000000000..81e28ef66 --- /dev/null +++ b/src/hooks/atlas/resolve-active-boulder-session.ts @@ -0,0 +1,53 @@ +import type { PluginInput } from "@opencode-ai/plugin" +import { appendSessionId, getPlanProgress, readBoulderState } from "../../features/boulder-state" +import type { BoulderState, PlanProgress } from "../../features/boulder-state" +import { subagentSessions } from "../../features/claude-code-session-state" +import { isSessionInBoulderLineage } from "./boulder-session-lineage" + +export async function resolveActiveBoulderSession(input: { + client: PluginInput["client"] + directory: string + sessionID: string +}): Promise<{ + boulderState: BoulderState + progress: PlanProgress + appendedSession: boolean +} | null> { + const boulderState = readBoulderState(input.directory) + if (!boulderState) { + return null + } + + const progress = getPlanProgress(boulderState.active_plan) + if (progress.isComplete) { + return { boulderState, progress, appendedSession: false } + } + + if (boulderState.session_ids.includes(input.sessionID)) { + return { boulderState, progress, appendedSession: false } + } + + if (!subagentSessions.has(input.sessionID)) { + return null + } + + const belongsToActiveBoulder = await isSessionInBoulderLineage({ + client: input.client, + sessionID: input.sessionID, + boulderSessionIDs: boulderState.session_ids, + }) + if (!belongsToActiveBoulder) { + return null + } + + const updatedBoulderState = appendSessionId(input.directory, input.sessionID) + if (!updatedBoulderState?.session_ids.includes(input.sessionID)) { + return null + } + + return { + boulderState: updatedBoulderState, + progress, + appendedSession: true, + } +} diff --git a/src/hooks/atlas/tool-execute-after.ts b/src/hooks/atlas/tool-execute-after.ts index fd8c1824c..9c4e82eae 100644 --- a/src/hooks/atlas/tool-execute-after.ts +++ b/src/hooks/atlas/tool-execute-after.ts @@ -3,20 +3,28 @@ import { appendSessionId, getPlanProgress, readBoulderState } from "../../featur import { log } from "../../shared/logger" import { isCallerOrchestrator } from "../../shared/session-utils" import { collectGitDiffStats, formatFileChanges } from "../../shared/git-worktree" +import { shouldPauseForFinalWaveApproval } from "./final-wave-approval-gate" import { HOOK_NAME } from "./hook-name" import { DIRECT_WORK_REMINDER } from "./system-reminder-templates" import { isSisyphusPath } from "./sisyphus-path" import { extractSessionIdFromOutput } from "./subagent-session-id" -import { buildCompletionGate, buildOrchestratorReminder, buildStandaloneVerificationReminder } from "./verification-reminders" +import { + buildCompletionGate, + buildFinalWaveApprovalReminder, + buildOrchestratorReminder, + buildStandaloneVerificationReminder, +} from "./verification-reminders" import { isWriteOrEditToolName } from "./write-edit-tool-policy" +import type { SessionState } from "./types" import type { ToolExecuteAfterInput, ToolExecuteAfterOutput } from "./types" export function createToolExecuteAfterHandler(input: { ctx: PluginInput pendingFilePaths: Map autoCommit: boolean - }): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise { - const { ctx, pendingFilePaths, autoCommit } = input + getState: (sessionID: string) => SessionState +}): (toolInput: ToolExecuteAfterInput, toolOutput: ToolExecuteAfterOutput) => Promise { + const { ctx, pendingFilePaths, autoCommit, getState } = input return async (toolInput, toolOutput): Promise => { // Guard against undefined output (e.g., from /review command - see issue #1035) if (!toolOutput) { @@ -75,10 +83,31 @@ export function createToolExecuteAfterHandler(input: { // Preserve original subagent response - critical for debugging failed tasks const originalResponse = toolOutput.output + const shouldPauseForApproval = shouldPauseForFinalWaveApproval({ + planPath: boulderState.active_plan, + taskOutput: originalResponse, + }) + + if (toolInput.sessionID) { + const sessionState = getState(toolInput.sessionID) + sessionState.waitingForFinalWaveApproval = shouldPauseForApproval + + if (shouldPauseForApproval && sessionState.pendingRetryTimer) { + clearTimeout(sessionState.pendingRetryTimer) + sessionState.pendingRetryTimer = undefined + } + } + + const leadReminder = shouldPauseForApproval + ? buildFinalWaveApprovalReminder(boulderState.plan_name, progress, subagentSessionId) + : buildCompletionGate(boulderState.plan_name, subagentSessionId) + const followupReminder = shouldPauseForApproval + ? null + : buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false) toolOutput.output = ` -${buildCompletionGate(boulderState.plan_name, subagentSessionId)} +${leadReminder} ## SUBAGENT WORK COMPLETED @@ -91,13 +120,16 @@ ${fileChanges} ${originalResponse} - -${buildOrchestratorReminder(boulderState.plan_name, progress, subagentSessionId, autoCommit, false)} -` +${ + followupReminder === null + ? "" + : `\n${followupReminder}\n` +}` log(`[${HOOK_NAME}] Output transformed for orchestrator mode (boulder)`, { plan: boulderState.plan_name, progress: `${progress.completed}/${progress.total}`, fileCount: gitStats.length, + waitingForFinalWaveApproval: shouldPauseForApproval, }) } else { toolOutput.output += `\n\n${buildStandaloneVerificationReminder(subagentSessionId)}\n` diff --git a/src/hooks/atlas/tsconfig.json b/src/hooks/atlas/tsconfig.json new file mode 100644 index 000000000..f68402a71 --- /dev/null +++ b/src/hooks/atlas/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "types": ["bun-types"] + }, + "include": ["./**/*.ts", "./**/*.d.ts"], + "exclude": [] +} diff --git a/src/hooks/atlas/types.ts b/src/hooks/atlas/types.ts index 54e45051d..bbc83a149 100644 --- a/src/hooks/atlas/types.ts +++ b/src/hooks/atlas/types.ts @@ -31,4 +31,5 @@ export interface SessionState { promptFailureCount: number lastFailureAt?: number pendingRetryTimer?: ReturnType + waitingForFinalWaveApproval?: boolean } diff --git a/src/hooks/atlas/verification-reminders.ts b/src/hooks/atlas/verification-reminders.ts index 019e5e869..33d7a47fe 100644 --- a/src/hooks/atlas/verification-reminders.ts +++ b/src/hooks/atlas/verification-reminders.ts @@ -108,6 +108,45 @@ ${commitStep} **${remaining} tasks remain. Keep bouldering.**` } +export function buildFinalWaveApprovalReminder( + planName: string, + progress: { total: number; completed: number }, + sessionId: string +): string { + const remaining = progress.total - progress.completed + + return ` +--- + +**BOULDER STATE:** Plan: \ +\`${planName}\` | ${progress.completed}/${progress.total} done | ${remaining} remaining + +--- + +${buildVerificationReminder(sessionId)} + +**FINAL WAVE APPROVAL GATE** + +The last Final Verification Wave result just passed. +This is the ONLY point where approval-style user interaction is required. + +1. Read \ +\`.sisyphus/plans/${planName}.md\` again and confirm the remaining unchecked item is the last final-wave task. +2. Consolidate the F1-F4 verdicts into a short summary for the user. +3. Tell the user all final reviewers approved. +4. Ask for explicit user approval before editing the last final-wave checkbox or marking the plan complete. +5. Wait for the user's explicit approval. Do NOT auto-continue. Do NOT call \ +\`task()\` again unless the user rejects and requests fixes. + +If the user rejects or requests changes: +- delegate the required fix +- re-run the affected final-wave reviewer +- present the updated results again +- wait again for explicit user approval + +**DO NOT mark the final-wave checkbox complete until the user explicitly says okay.**` +} + export function buildStandaloneVerificationReminder(sessionId: string): string { return ` --- diff --git a/src/hooks/auto-slash-command/executor.ts b/src/hooks/auto-slash-command/executor.ts index cd06ee78d..8a7536ad9 100644 --- a/src/hooks/auto-slash-command/executor.ts +++ b/src/hooks/auto-slash-command/executor.ts @@ -1,86 +1,25 @@ -import { existsSync, readdirSync, readFileSync } from "fs" -import { join, basename, dirname } from "path" +import { dirname } from "path" import { - parseFrontmatter, resolveCommandsInText, resolveFileReferencesInText, - sanitizeModelField, - getClaudeConfigDir, - getOpenCodeConfigDir, - discoverPluginCommandDefinitions, } from "../../shared" -import { loadBuiltinCommands } from "../../features/builtin-commands" -import type { CommandFrontmatter } from "../../features/claude-code-command-loader/types" -import { isMarkdownFile } from "../../shared/file-utils" import { discoverAllSkills, type LoadedSkill, type LazyContentLoader } from "../../features/opencode-skill-loader" +import { discoverCommandsSync } from "../../tools/slashcommand" +import type { CommandInfo as DiscoveredCommandInfo, CommandMetadata } from "../../tools/slashcommand/types" import type { ParsedSlashCommand } from "./types" -interface CommandScope { - type: "user" | "project" | "opencode" | "opencode-project" | "skill" | "builtin" | "plugin" -} - -interface CommandMetadata { - name: string - description: string - argumentHint?: string - model?: string - agent?: string - subtask?: boolean -} - -interface CommandInfo { +interface SkillCommandInfo { name: string path?: string metadata: CommandMetadata content?: string - scope: CommandScope["type"] + scope: "skill" lazyContentLoader?: LazyContentLoader } -function discoverCommandsFromDir(commandsDir: string, scope: CommandScope["type"]): CommandInfo[] { - if (!existsSync(commandsDir)) { - return [] - } +type CommandInfo = DiscoveredCommandInfo | SkillCommandInfo - const entries = readdirSync(commandsDir, { withFileTypes: true }) - const commands: CommandInfo[] = [] - - for (const entry of entries) { - if (!isMarkdownFile(entry)) continue - - const commandPath = join(commandsDir, entry.name) - const commandName = basename(entry.name, ".md") - - try { - const content = readFileSync(commandPath, "utf-8") - const { data, body } = parseFrontmatter(content) - - const isOpencodeSource = scope === "opencode" || scope === "opencode-project" - const metadata: CommandMetadata = { - name: commandName, - description: data.description || "", - argumentHint: data["argument-hint"], - model: sanitizeModelField(data.model, isOpencodeSource ? "opencode" : "claude-code"), - agent: data.agent, - subtask: Boolean(data.subtask), - } - - commands.push({ - name: commandName, - path: commandPath, - metadata, - content: body, - scope, - }) - } catch { - continue - } - } - - return commands -} - -function skillToCommandInfo(skill: LoadedSkill): CommandInfo { +function skillToCommandInfo(skill: LoadedSkill): SkillCommandInfo { return { name: skill.name, path: skill.path, @@ -104,60 +43,30 @@ export interface ExecutorOptions { enabledPluginsOverride?: Record } -function discoverPluginCommands(options?: ExecutorOptions): CommandInfo[] { - const pluginDefinitions = discoverPluginCommandDefinitions(options) - - return Object.entries(pluginDefinitions).map(([name, definition]) => ({ - name, - metadata: { - name, - description: definition.description || "", - model: definition.model, - agent: definition.agent, - subtask: definition.subtask, - }, - content: definition.template, - scope: "plugin", - })) +function filterDiscoveredCommandsByScope( + commands: DiscoveredCommandInfo[], + scope: DiscoveredCommandInfo["scope"], +): DiscoveredCommandInfo[] { + return commands.filter(command => command.scope === scope) } async function discoverAllCommands(options?: ExecutorOptions): Promise { - const configDir = getOpenCodeConfigDir({ binary: "opencode" }) - const userCommandsDir = join(getClaudeConfigDir(), "commands") - const projectCommandsDir = join(process.cwd(), ".claude", "commands") - const opencodeGlobalDir = join(configDir, "command") - const opencodeProjectDir = join(process.cwd(), ".opencode", "command") - - const userCommands = discoverCommandsFromDir(userCommandsDir, "user") - const opencodeGlobalCommands = discoverCommandsFromDir(opencodeGlobalDir, "opencode") - const projectCommands = discoverCommandsFromDir(projectCommandsDir, "project") - const opencodeProjectCommands = discoverCommandsFromDir(opencodeProjectDir, "opencode-project") - const builtinCommandsMap = loadBuiltinCommands() - const builtinCommands: CommandInfo[] = Object.values(builtinCommandsMap).map(cmd => ({ - name: cmd.name, - metadata: { - name: cmd.name, - description: cmd.description || "", - model: cmd.model, - agent: cmd.agent, - subtask: cmd.subtask, - }, - content: cmd.template, - scope: "builtin", - })) + const discoveredCommands = discoverCommandsSync(process.cwd(), { + pluginsEnabled: options?.pluginsEnabled, + enabledPluginsOverride: options?.enabledPluginsOverride, + }) const skills = options?.skills ?? await discoverAllSkills() const skillCommands = skills.map(skillToCommandInfo) - const pluginCommands = discoverPluginCommands(options) return [ - ...builtinCommands, - ...opencodeProjectCommands, - ...projectCommands, - ...opencodeGlobalCommands, - ...userCommands, + ...filterDiscoveredCommandsByScope(discoveredCommands, "builtin"), + ...filterDiscoveredCommandsByScope(discoveredCommands, "opencode-project"), + ...filterDiscoveredCommandsByScope(discoveredCommands, "project"), + ...filterDiscoveredCommandsByScope(discoveredCommands, "opencode"), + ...filterDiscoveredCommandsByScope(discoveredCommands, "user"), ...skillCommands, - ...pluginCommands, + ...filterDiscoveredCommandsByScope(discoveredCommands, "plugin"), ] } diff --git a/src/hooks/gpt-permission-continuation/constants.ts b/src/hooks/gpt-permission-continuation/constants.ts index 690a0f0bd..04eda9a72 100644 --- a/src/hooks/gpt-permission-continuation/constants.ts +++ b/src/hooks/gpt-permission-continuation/constants.ts @@ -1,5 +1,6 @@ export const HOOK_NAME = "gpt-permission-continuation" export const CONTINUATION_PROMPT = "continue" +export const MAX_CONSECUTIVE_AUTO_CONTINUES = 3 export const DEFAULT_STALL_PATTERNS = [ "if you want", diff --git a/src/hooks/gpt-permission-continuation/gpt-permission-continuation.test.ts b/src/hooks/gpt-permission-continuation/gpt-permission-continuation.test.ts index dfb8b4591..98e2e2ae8 100644 --- a/src/hooks/gpt-permission-continuation/gpt-permission-continuation.test.ts +++ b/src/hooks/gpt-permission-continuation/gpt-permission-continuation.test.ts @@ -1,4 +1,7 @@ -import { describe, expect, test } from "bun:test" +/// + +import { createOpencodeClient } from "@opencode-ai/sdk" +import { describe, expect, it as test } from "bun:test" import { createGptPermissionContinuationHook } from "." @@ -15,29 +18,97 @@ type SessionMessage = { parts?: Array<{ type: string; text?: string }> } -function createMockPluginInput(messages: SessionMessage[]) { - const promptCalls: string[] = [] +type GptPermissionContext = Parameters[0] - const ctx = { - directory: "/tmp/test", - client: { - session: { - messages: async () => ({ data: messages }), - prompt: async (input: { body: { parts: Array<{ text: string }> } }) => { - promptCalls.push(input.body.parts[0]?.text ?? "") - return {} - }, - promptAsync: async (input: { body: { parts: Array<{ text: string }> } }) => { - promptCalls.push(input.body.parts[0]?.text ?? "") - return {} - }, +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +function extractPromptText(input: unknown): string { + if (!isRecord(input)) return "" + + const body = input.body + if (!isRecord(body)) return "" + + const parts = body.parts + if (!Array.isArray(parts)) return "" + + const firstPart = parts[0] + if (!isRecord(firstPart)) return "" + + return typeof firstPart.text === "string" ? firstPart.text : "" +} + +function createMockPluginInput(messages: SessionMessage[]): { + ctx: GptPermissionContext + promptCalls: string[] +} { + const promptCalls: string[] = [] + const client = createOpencodeClient({ directory: "/tmp/test" }) + const shell = Object.assign( + () => { + throw new Error("$ is not used in this test") + }, + { + braces: () => [], + escape: (input: string) => input, + env() { + return shell + }, + cwd() { + return shell + }, + nothrow() { + return shell + }, + throws() { + return shell }, }, - } as any + ) + const request = new Request("http://localhost") + const response = new Response() + + Reflect.set(client.session, "messages", async () => ({ data: messages, error: undefined, request, response })) + Reflect.set(client.session, "prompt", async (input: unknown) => { + promptCalls.push(extractPromptText(input)) + return { data: undefined, error: undefined, request, response } + }) + Reflect.set(client.session, "promptAsync", async (input: unknown) => { + promptCalls.push(extractPromptText(input)) + return { data: undefined, error: undefined, request, response } + }) + + const ctx: GptPermissionContext = { + client, + project: { + id: "test-project", + worktree: "/tmp/test", + time: { created: Date.now() }, + }, + directory: "/tmp/test", + worktree: "/tmp/test", + serverUrl: new URL("http://localhost"), + $: shell, + } return { ctx, promptCalls } } +function createAssistantMessage(id: string, text: string): SessionMessage { + return { + info: { id, role: "assistant", modelID: "gpt-5.4" }, + parts: [{ type: "text", text }], + } +} + +function createUserMessage(id: string, text: string): SessionMessage { + return { + info: { id, role: "user" }, + parts: [{ type: "text", text }], + } +} + describe("gpt-permission-continuation", () => { test("injects continue when the last GPT assistant reply asks for permission", async () => { // given @@ -147,4 +218,117 @@ describe("gpt-permission-continuation", () => { // then expect(promptCalls).toEqual(["continue"]) }) + + describe("#given repeated GPT permission tails in the same session", () => { + describe("#when the permission phrases keep changing", () => { + test("stops injecting after three consecutive auto-continues", async () => { + // given + const messages: SessionMessage[] = [ + createUserMessage("msg-0", "Please continue the fix."), + createAssistantMessage("msg-1", "If you want, I can apply the patch next."), + ] + const { ctx, promptCalls } = createMockPluginInput(messages) + const hook = createGptPermissionContinuationHook(ctx) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-2", "continue")) + messages.push(createAssistantMessage("msg-3", "Would you like me to continue with the tests?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-4", "continue")) + messages.push(createAssistantMessage("msg-5", "Do you want me to wire the remaining cleanup?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-6", "continue")) + messages.push(createAssistantMessage("msg-7", "Shall I finish the remaining updates?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + + // then + expect(promptCalls).toEqual(["continue", "continue", "continue"]) + }) + }) + + describe("#when a real user message arrives between auto-continues", () => { + test("resets the consecutive auto-continue counter", async () => { + // given + const messages: SessionMessage[] = [ + createUserMessage("msg-0", "Please continue the fix."), + createAssistantMessage("msg-1", "If you want, I can apply the patch next."), + ] + const { ctx, promptCalls } = createMockPluginInput(messages) + const hook = createGptPermissionContinuationHook(ctx) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-2", "continue")) + messages.push(createAssistantMessage("msg-3", "Would you like me to continue with the tests?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-4", "Please keep going and finish the cleanup.")) + messages.push(createAssistantMessage("msg-5", "Do you want me to wire the remaining cleanup?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-6", "continue")) + messages.push(createAssistantMessage("msg-7", "Shall I finish the remaining updates?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-8", "continue")) + messages.push(createAssistantMessage("msg-9", "If you want, I can apply the final polish.")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-10", "continue")) + messages.push(createAssistantMessage("msg-11", "Would you like me to ship the final verification?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + + // then + expect(promptCalls).toEqual(["continue", "continue", "continue", "continue", "continue"]) + }) + }) + + describe("#when the same permission phrase repeats after an auto-continue", () => { + test("stops immediately on stagnation", async () => { + // given + const messages: SessionMessage[] = [ + createUserMessage("msg-0", "Please continue the fix."), + createAssistantMessage("msg-1", "If you want, I can apply the patch next."), + ] + const { ctx, promptCalls } = createMockPluginInput(messages) + const hook = createGptPermissionContinuationHook(ctx) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-2", "continue")) + messages.push(createAssistantMessage("msg-3", "If you want, I can apply the patch next.")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + + // then + expect(promptCalls).toEqual(["continue"]) + }) + }) + + describe("#when a user manually types continue after the cap is reached", () => { + test("resets the cap and allows another auto-continue", async () => { + // given + const messages: SessionMessage[] = [ + createUserMessage("msg-0", "Please continue the fix."), + createAssistantMessage("msg-1", "If you want, I can apply the patch next."), + ] + const { ctx, promptCalls } = createMockPluginInput(messages) + const hook = createGptPermissionContinuationHook(ctx) + + // when + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-2", "continue")) + messages.push(createAssistantMessage("msg-3", "Would you like me to continue with the tests?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-4", "continue")) + messages.push(createAssistantMessage("msg-5", "Do you want me to wire the remaining cleanup?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-6", "continue")) + messages.push(createAssistantMessage("msg-7", "Shall I finish the remaining updates?")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + messages.push(createUserMessage("msg-8", "continue")) + messages.push(createAssistantMessage("msg-9", "If you want, I can apply the final polish.")) + await hook.handler({ event: { type: "session.idle", properties: { sessionID: "ses-1" } } }) + + // then + expect(promptCalls).toEqual(["continue", "continue", "continue", "continue"]) + }) + }) + }) }) diff --git a/src/hooks/gpt-permission-continuation/handler.ts b/src/hooks/gpt-permission-continuation/handler.ts index 0db3142dc..27f28530f 100644 --- a/src/hooks/gpt-permission-continuation/handler.ts +++ b/src/hooks/gpt-permission-continuation/handler.ts @@ -9,10 +9,16 @@ import { isGptAssistantMessage, type SessionMessage, } from "./assistant-message" -import { CONTINUATION_PROMPT, HOOK_NAME } from "./constants" +import { + CONTINUATION_PROMPT, + HOOK_NAME, + MAX_CONSECUTIVE_AUTO_CONTINUES, +} from "./constants" import { detectStallPattern } from "./detector" import type { SessionStateStore } from "./session-state" +type SessionState = ReturnType + async function promptContinuation( ctx: PluginInput, sessionID: string, @@ -33,6 +39,38 @@ async function promptContinuation( await ctx.client.session.prompt(payload) } +function getLastUserMessageBefore( + messages: SessionMessage[], + lastAssistantIndex: number, +): SessionMessage | null { + for (let index = lastAssistantIndex - 1; index >= 0; index--) { + if (messages[index].info?.role === "user") { + return messages[index] + } + } + + return null +} + +function isAutoContinuationUserMessage(message: SessionMessage): boolean { + return extractAssistantText(message).trim().toLowerCase() === CONTINUATION_PROMPT +} + +function extractPermissionPhrase(text: string): string | null { + const tail = text.slice(-800) + const lines = tail.split("\n").map((line) => line.trim()).filter(Boolean) + const hotZone = lines.slice(-3).join(" ") + const sentenceParts = hotZone.trim().replace(/\s+/g, " ").split(/(?<=[.!?])\s+/) + const trailingSegment = sentenceParts[sentenceParts.length - 1]?.trim().toLowerCase() ?? "" + return trailingSegment || null +} + +function resetAutoContinuationState(state: SessionState): void { + state.consecutiveAutoContinueCount = 0 + state.awaitingAutoContinuationResponse = false + state.lastAutoContinuePermissionPhrase = undefined +} + export function createGptPermissionContinuationHandler(args: { ctx: PluginInput sessionStateStore: SessionStateStore @@ -78,6 +116,21 @@ export function createGptPermissionContinuationHandler(args: { const lastAssistantMessage = getLastAssistantMessage(messages) if (!lastAssistantMessage) return + const lastAssistantIndex = messages.lastIndexOf(lastAssistantMessage) + const previousUserMessage = getLastUserMessageBefore(messages, lastAssistantIndex) + const previousUserMessageWasAutoContinuation = + previousUserMessage !== null + && state.awaitingAutoContinuationResponse + && isAutoContinuationUserMessage(previousUserMessage) + + if (previousUserMessageWasAutoContinuation) { + state.awaitingAutoContinuationResponse = false + } else if (previousUserMessage) { + resetAutoContinuationState(state) + } else { + state.awaitingAutoContinuationResponse = false + } + const messageID = lastAssistantMessage.info?.id if (messageID && state.lastHandledMessageID === messageID) { log(`[${HOOK_NAME}] Skipped: already handled assistant message`, { sessionID, messageID }) @@ -99,9 +152,40 @@ export function createGptPermissionContinuationHandler(args: { return } + const permissionPhrase = extractPermissionPhrase(assistantText) + if (!permissionPhrase) { + return + } + + if (state.consecutiveAutoContinueCount >= MAX_CONSECUTIVE_AUTO_CONTINUES) { + state.lastHandledMessageID = messageID + log(`[${HOOK_NAME}] Skipped: reached max consecutive auto-continues`, { + sessionID, + messageID, + consecutiveAutoContinueCount: state.consecutiveAutoContinueCount, + }) + return + } + + if ( + state.consecutiveAutoContinueCount >= 1 + && state.lastAutoContinuePermissionPhrase === permissionPhrase + ) { + state.lastHandledMessageID = messageID + log(`[${HOOK_NAME}] Skipped: repeated permission phrase after auto-continue`, { + sessionID, + messageID, + permissionPhrase, + }) + return + } + state.inFlight = true await promptContinuation(ctx, sessionID) state.lastHandledMessageID = messageID + state.consecutiveAutoContinueCount += 1 + state.awaitingAutoContinuationResponse = true + state.lastAutoContinuePermissionPhrase = permissionPhrase state.lastInjectedAt = Date.now() log(`[${HOOK_NAME}] Injected continuation prompt`, { sessionID, messageID }) } catch (error) { diff --git a/src/hooks/gpt-permission-continuation/session-state.ts b/src/hooks/gpt-permission-continuation/session-state.ts index 132675086..9414692e4 100644 --- a/src/hooks/gpt-permission-continuation/session-state.ts +++ b/src/hooks/gpt-permission-continuation/session-state.ts @@ -1,6 +1,9 @@ type SessionState = { inFlight: boolean + consecutiveAutoContinueCount: number + awaitingAutoContinuationResponse: boolean lastHandledMessageID?: string + lastAutoContinuePermissionPhrase?: string lastInjectedAt?: number } @@ -15,6 +18,8 @@ export function createSessionStateStore() { const created: SessionState = { inFlight: false, + consecutiveAutoContinueCount: 0, + awaitingAutoContinuationResponse: false, } states.set(sessionID, created) return created diff --git a/src/hooks/model-fallback/hook.test.ts b/src/hooks/model-fallback/hook.test.ts index bcd720c56..04cfcf659 100644 --- a/src/hooks/model-fallback/hook.test.ts +++ b/src/hooks/model-fallback/hook.test.ts @@ -134,8 +134,8 @@ describe("model fallback hook", () => { //#then - chain should progress to entry[1], not repeat entry[0] expect(secondOutput.message["model"]).toEqual({ - providerID: "kimi-for-coding", - modelID: "k2p5", + providerID: "opencode-go", + modelID: "kimi-k2.5", }) expect(secondOutput.message["variant"]).toBeUndefined() }) diff --git a/src/hooks/preemptive-compaction.aws-bedrock.test.ts b/src/hooks/preemptive-compaction.aws-bedrock.test.ts new file mode 100644 index 000000000..9ce47ac8c --- /dev/null +++ b/src/hooks/preemptive-compaction.aws-bedrock.test.ts @@ -0,0 +1,64 @@ +/// + +import { describe, expect, it, mock } from "bun:test" + +import { OhMyOpenCodeConfigSchema } from "../config" + +const { createPreemptiveCompactionHook } = await import("./preemptive-compaction") + +type HookContext = Parameters[0] + +function createMockContext(): HookContext { + return { + client: { + session: { + messages: mock(() => Promise.resolve({ data: [] })), + summarize: mock(() => Promise.resolve({})), + }, + tui: { + showToast: mock(() => Promise.resolve()), + }, + }, + directory: "/tmp/test", + } +} + +describe("preemptive-compaction aws-bedrock-anthropic", () => { + it("triggers compaction for aws-bedrock-anthropic provider when usage exceeds threshold", async () => { + // given + const ctx = createMockContext() + const pluginConfig = OhMyOpenCodeConfigSchema.parse({}) + const hook = createPreemptiveCompactionHook(ctx, pluginConfig) + const sessionID = "ses_aws_bedrock_anthropic_high" + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "aws-bedrock-anthropic", + modelID: "claude-sonnet-4-6", + finish: true, + tokens: { + input: 170000, + output: 1000, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + // when + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_aws_bedrock_1" }, + { title: "", output: "test", metadata: null }, + ) + + // then + expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1) + }) +}) diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts index b1b6c3bbf..b75679e40 100644 --- a/src/hooks/preemptive-compaction.ts +++ b/src/hooks/preemptive-compaction.ts @@ -1,23 +1,13 @@ import { log } from "../shared/logger" import type { OhMyOpenCodeConfig } from "../config" +import { + resolveActualContextLimit, + type ContextLimitModelCacheState, +} from "../shared/context-limit-resolver" import { resolveCompactionModel } from "./shared/compaction-model-resolver" -const DEFAULT_ACTUAL_LIMIT = 200_000 const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000 -type ModelCacheStateLike = { - anthropicContext1MEnabled: boolean - modelContextLimitsCache?: Map -} - -function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { - return (modelCacheState?.anthropicContext1MEnabled ?? false) || - process.env.ANTHROPIC_1M_CONTEXT === "true" || - process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" - ? 1_000_000 - : DEFAULT_ACTUAL_LIMIT -} - const PREEMPTIVE_COMPACTION_THRESHOLD = 0.78 interface TokenInfo { @@ -33,7 +23,7 @@ interface CachedCompactionState { tokens: TokenInfo } -function withTimeout( +async function withTimeout( promise: Promise, timeoutMs: number, errorMessage: string, @@ -46,17 +36,13 @@ function withTimeout( }, timeoutMs) }) - return Promise.race([promise, timeoutPromise]).finally(() => { + return await Promise.race([promise, timeoutPromise]).finally(() => { if (timeoutID !== undefined) { clearTimeout(timeoutID) } }) } -function isAnthropicProvider(providerID: string): boolean { - return providerID === "anthropic" || providerID === "google-vertex-anthropic" -} - type PluginInput = { client: { session: { @@ -76,7 +62,7 @@ type PluginInput = { export function createPreemptiveCompactionHook( ctx: PluginInput, pluginConfig: OhMyOpenCodeConfig, - modelCacheState?: ModelCacheStateLike, + modelCacheState?: ContextLimitModelCacheState, ) { const compactionInProgress = new Set() const compactedSessions = new Set() @@ -92,24 +78,18 @@ export function createPreemptiveCompactionHook( const cached = tokenCache.get(sessionID) if (!cached) return - const isAnthropic = isAnthropicProvider(cached.providerID) - const modelSpecificLimit = !isAnthropic - ? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`) - : undefined + const actualLimit = resolveActualContextLimit( + cached.providerID, + cached.modelID, + modelCacheState, + ) - let actualLimit: number - if (isAnthropic) { - actualLimit = getAnthropicActualLimit(modelCacheState) - } else { - if (modelSpecificLimit === undefined) { - log("[preemptive-compaction] Skipping preemptive compaction: unknown context limit for model", { - providerID: cached.providerID, - modelID: cached.modelID, - }) - return - } - - actualLimit = modelSpecificLimit + if (actualLimit === null) { + log("[preemptive-compaction] Skipping preemptive compaction: unknown context limit for model", { + providerID: cached.providerID, + modelID: cached.modelID, + }) + return } const lastTokens = cached.tokens diff --git a/src/hooks/runtime-fallback/event-handler.test.ts b/src/hooks/runtime-fallback/event-handler.test.ts new file mode 100644 index 000000000..3bad84bef --- /dev/null +++ b/src/hooks/runtime-fallback/event-handler.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it } from "bun:test" +import type { HookDeps, RuntimeFallbackPluginInput } from "./types" +import type { AutoRetryHelpers } from "./auto-retry" +import { createFallbackState } from "./fallback-state" +import { createEventHandler } from "./event-handler" + +function createContext(): RuntimeFallbackPluginInput { + return { + client: { + session: { + abort: async () => ({}), + messages: async () => ({ data: [] }), + promptAsync: async () => ({}), + }, + tui: { + showToast: async () => ({}), + }, + }, + directory: "/test/dir", + } +} + +function createDeps(): HookDeps { + return { + ctx: createContext(), + config: { + enabled: true, + retry_on_errors: [429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + timeout_seconds: 30, + notify_on_fallback: false, + }, + options: undefined, + pluginConfig: {}, + sessionStates: new Map(), + sessionLastAccess: new Map(), + sessionRetryInFlight: new Set(), + sessionAwaitingFallbackResult: new Set(), + sessionFallbackTimeouts: new Map(), + sessionStatusRetryKeys: new Map(), + } +} + +function createHelpers(deps: HookDeps, abortCalls: string[], clearCalls: string[]): AutoRetryHelpers { + return { + abortSessionRequest: async (sessionID: string) => { + abortCalls.push(sessionID) + }, + clearSessionFallbackTimeout: (sessionID: string) => { + clearCalls.push(sessionID) + deps.sessionFallbackTimeouts.delete(sessionID) + }, + scheduleSessionFallbackTimeout: () => {}, + autoRetryWithFallback: async () => {}, + resolveAgentForSessionFromContext: async () => undefined, + cleanupStaleSessions: () => {}, + } +} + +describe("createEventHandler", () => { + it("#given a session retry dedupe key #when session.stop fires #then the retry dedupe key is cleared", async () => { + // given + const sessionID = "session-stop" + const deps = createDeps() + const abortCalls: string[] = [] + const clearCalls: string[] = [] + const state = createFallbackState("google/gemini-2.5-pro") + state.pendingFallbackModel = "openai/gpt-5.4" + deps.sessionStates.set(sessionID, state) + deps.sessionRetryInFlight.add(sessionID) + deps.sessionStatusRetryKeys.set(sessionID, "retry:1") + const handler = createEventHandler(deps, createHelpers(deps, abortCalls, clearCalls)) + + // when + await handler({ event: { type: "session.stop", properties: { sessionID } } }) + + // then + expect(deps.sessionStatusRetryKeys.has(sessionID)).toBe(false) + expect(clearCalls).toEqual([sessionID]) + expect(abortCalls).toEqual([sessionID]) + }) + + it("#given a session retry dedupe key without a pending fallback result #when session.idle fires #then the retry dedupe key is cleared", async () => { + // given + const sessionID = "session-idle" + const deps = createDeps() + const abortCalls: string[] = [] + const clearCalls: string[] = [] + const state = createFallbackState("google/gemini-2.5-pro") + state.pendingFallbackModel = "openai/gpt-5.4" + deps.sessionStates.set(sessionID, state) + deps.sessionRetryInFlight.add(sessionID) + deps.sessionFallbackTimeouts.set(sessionID, 1) + deps.sessionStatusRetryKeys.set(sessionID, "retry:1") + const handler = createEventHandler(deps, createHelpers(deps, abortCalls, clearCalls)) + + // when + await handler({ event: { type: "session.idle", properties: { sessionID } } }) + + // then + expect(deps.sessionStatusRetryKeys.has(sessionID)).toBe(false) + expect(clearCalls).toEqual([sessionID]) + expect(abortCalls).toEqual([]) + expect(state.pendingFallbackModel).toBe(undefined) + }) +}) diff --git a/src/hooks/runtime-fallback/event-handler.ts b/src/hooks/runtime-fallback/event-handler.ts index 71bd1af06..09175ddaa 100644 --- a/src/hooks/runtime-fallback/event-handler.ts +++ b/src/hooks/runtime-fallback/event-handler.ts @@ -54,6 +54,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { sessionRetryInFlight.delete(sessionID) sessionAwaitingFallbackResult.delete(sessionID) + sessionStatusRetryKeys.delete(sessionID) const state = sessionStates.get(sessionID) if (state?.pendingFallbackModel) { @@ -75,6 +76,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { const hadTimeout = sessionFallbackTimeouts.has(sessionID) helpers.clearSessionFallbackTimeout(sessionID) sessionRetryInFlight.delete(sessionID) + sessionStatusRetryKeys.delete(sessionID) const state = sessionStates.get(sessionID) if (state?.pendingFallbackModel) { diff --git a/src/hooks/runtime-fallback/hook-dispose-cleanup.test.ts b/src/hooks/runtime-fallback/hook-dispose-cleanup.test.ts new file mode 100644 index 000000000..912100011 --- /dev/null +++ b/src/hooks/runtime-fallback/hook-dispose-cleanup.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from "bun:test" +import type { RuntimeFallbackPluginInput } from "./types" +import { createRuntimeFallbackHook } from "./hook" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" + +function createContext(promptCalls: unknown[]): RuntimeFallbackPluginInput { + return { + client: { + session: { + abort: async () => ({}), + messages: async () => ({ + data: [{ info: { role: "user" }, parts: [{ type: "text", text: "retry this" }] }], + }), + promptAsync: async (args: unknown) => { + promptCalls.push(args) + return {} + }, + }, + tui: { + showToast: async () => ({}), + }, + }, + directory: "/test/dir", + } +} + +describe("createRuntimeFallbackHook dispose retry-key cleanup", () => { + it("#given a session.status retry key #when dispose() is called #then the same retry event is not deduplicated afterward", async () => { + // given + const promptCalls: unknown[] = [] + const sessionID = "session-dispose-retry-key" + const hook = createRuntimeFallbackHook(createContext(promptCalls), { + config: { + enabled: true, + retry_on_errors: [429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + timeout_seconds: 30, + notify_on_fallback: false, + }, + pluginConfig: { + categories: { + test: { + fallback_models: ["openai/gpt-5.2"], + }, + }, + }, + }) + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "quotio/claude-opus-4-6" } }, + }, + }) + + const retryEvent = { + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + attempt: 1, + message: "All credentials for model claude-opus-4-6 are cooling down [retrying in 7m 56s attempt #1]", + }, + }, + }, + } + + await hook.event(retryEvent) + expect(promptCalls).toHaveLength(1) + + // when + hook.dispose?.() + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "quotio/claude-opus-4-6" } }, + }, + }) + await hook.event(retryEvent) + + // then + expect(promptCalls).toHaveLength(2) + }) +}) diff --git a/src/hooks/runtime-fallback/hook.ts b/src/hooks/runtime-fallback/hook.ts index bbf8d439b..2a13d507e 100644 --- a/src/hooks/runtime-fallback/hook.ts +++ b/src/hooks/runtime-fallback/hook.ts @@ -76,6 +76,7 @@ export function createRuntimeFallbackHook( deps.sessionRetryInFlight.clear() deps.sessionAwaitingFallbackResult.clear() deps.sessionFallbackTimeouts.clear() + deps.sessionStatusRetryKeys.clear() } return { diff --git a/src/hooks/runtime-fallback/message-update-handler.test.ts b/src/hooks/runtime-fallback/message-update-handler.test.ts new file mode 100644 index 000000000..7a3142e68 --- /dev/null +++ b/src/hooks/runtime-fallback/message-update-handler.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from "bun:test" +import type { RuntimeFallbackPluginInput } from "./types" +import { hasVisibleAssistantResponse } from "./visible-assistant-response" + +function createContext(messagesResponse: unknown): RuntimeFallbackPluginInput { + return { + client: { + session: { + abort: async () => ({}), + messages: async () => messagesResponse, + promptAsync: async () => ({}), + }, + tui: { + showToast: async () => ({}), + }, + }, + directory: "/test/dir", + } +} + +describe("hasVisibleAssistantResponse", () => { + it("#given only an old assistant reply before the latest user turn #when visibility is checked #then the stale reply is ignored", async () => { + // given + const checkVisibleResponse = hasVisibleAssistantResponse(() => undefined) + const ctx = createContext({ + data: [ + { info: { role: "user" }, parts: [{ type: "text", text: "older question" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "older answer" }] }, + { info: { role: "user" }, parts: [{ type: "text", text: "latest question" }] }, + ], + }) + + // when + const result = await checkVisibleResponse(ctx, "session-old-assistant", undefined) + + // then + expect(result).toBe(false) + }) + + it("#given an assistant reply after the latest user turn #when visibility is checked #then the current reply is treated as visible", async () => { + // given + const checkVisibleResponse = hasVisibleAssistantResponse(() => undefined) + const ctx = createContext({ + data: [ + { info: { role: "user" }, parts: [{ type: "text", text: "latest question" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "visible answer" }] }, + ], + }) + + // when + const result = await checkVisibleResponse(ctx, "session-visible-assistant", undefined) + + // then + expect(result).toBe(true) + }) +}) diff --git a/src/hooks/runtime-fallback/message-update-handler.ts b/src/hooks/runtime-fallback/message-update-handler.ts index 488ffc17d..0c07405a0 100644 --- a/src/hooks/runtime-fallback/message-update-handler.ts +++ b/src/hooks/runtime-fallback/message-update-handler.ts @@ -7,49 +7,12 @@ import { createFallbackState } from "./fallback-state" import { getFallbackModelsForSession } from "./fallback-models" import { resolveFallbackBootstrapModel } from "./fallback-bootstrap-model" import { dispatchFallbackRetry } from "./fallback-retry-dispatcher" -import { extractSessionMessages } from "./session-messages" +import { hasVisibleAssistantResponse } from "./visible-assistant-response" -export function hasVisibleAssistantResponse(extractAutoRetrySignalFn: typeof extractAutoRetrySignal) { - return async ( - ctx: HookDeps["ctx"], - sessionID: string, - _info: Record | undefined, - ): Promise => { - try { - const messagesResp = await ctx.client.session.messages({ - path: { id: sessionID }, - query: { directory: ctx.directory }, - }) - - const msgs = extractSessionMessages(messagesResp) - - if (!msgs || msgs.length === 0) return false - - const lastAssistant = [...msgs].reverse().find((m) => m.info?.role === "assistant") - if (!lastAssistant) return false - if (lastAssistant.info?.error) return false - - const parts = lastAssistant.parts ?? - (lastAssistant.info?.parts as Array<{ type?: string; text?: string }> | undefined) - - const textFromParts = (parts ?? []) - .filter((p) => p.type === "text" && typeof p.text === "string") - .map((p) => p.text!.trim()) - .filter((text) => text.length > 0) - .join("\n") - - if (!textFromParts) return false - if (extractAutoRetrySignalFn({ message: textFromParts })) return false - - return true - } catch { - return false - } - } -} +export { hasVisibleAssistantResponse } from "./visible-assistant-response" export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) { - const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps + const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult, sessionStatusRetryKeys } = deps const checkVisibleResponse = hasVisibleAssistantResponse(extractAutoRetrySignal) return async (props: Record | undefined) => { @@ -91,6 +54,7 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel } sessionAwaitingFallbackResult.delete(sessionID) + sessionStatusRetryKeys.delete(sessionID) helpers.clearSessionFallbackTimeout(sessionID) const state = sessionStates.get(sessionID) if (state?.pendingFallbackModel) { diff --git a/src/hooks/runtime-fallback/success-retry-key-cleanup.test.ts b/src/hooks/runtime-fallback/success-retry-key-cleanup.test.ts new file mode 100644 index 000000000..828c67b72 --- /dev/null +++ b/src/hooks/runtime-fallback/success-retry-key-cleanup.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from "bun:test" +import type { HookDeps, RuntimeFallbackPluginInput } from "./types" +import type { AutoRetryHelpers } from "./auto-retry" +import { createFallbackState } from "./fallback-state" + +type MessageUpdateHandlerModule = typeof import("./message-update-handler") + +async function importFreshMessageUpdateHandlerModule(): Promise { + return import(`./message-update-handler?success-retry-key-${Date.now()}-${Math.random()}`) +} + +function createContext(messagesResponse: unknown): RuntimeFallbackPluginInput { + return { + client: { + session: { + abort: async () => ({}), + messages: async () => messagesResponse, + promptAsync: async () => ({}), + }, + tui: { + showToast: async () => ({}), + }, + }, + directory: "/test/dir", + } +} + +function createDeps(messagesResponse: unknown): HookDeps { + return { + ctx: createContext(messagesResponse), + config: { + enabled: true, + retry_on_errors: [429, 503, 529], + max_fallback_attempts: 3, + cooldown_seconds: 60, + timeout_seconds: 30, + notify_on_fallback: false, + }, + options: undefined, + pluginConfig: {}, + sessionStates: new Map(), + sessionLastAccess: new Map(), + sessionRetryInFlight: new Set(), + sessionAwaitingFallbackResult: new Set(), + sessionFallbackTimeouts: new Map(), + sessionStatusRetryKeys: new Map(), + } +} + +function createHelpers(clearCalls: string[]): AutoRetryHelpers { + return { + abortSessionRequest: async () => {}, + clearSessionFallbackTimeout: (sessionID: string) => { + clearCalls.push(sessionID) + }, + scheduleSessionFallbackTimeout: () => {}, + autoRetryWithFallback: async () => {}, + resolveAgentForSessionFromContext: async () => undefined, + cleanupStaleSessions: () => {}, + } +} + +describe("createMessageUpdateHandler retry-key cleanup", () => { + it("#given a visible assistant reply after the latest user turn #when a non-error assistant update arrives #then the retry dedupe key is cleared with the fallback watchdog", async () => { + // given + const { createMessageUpdateHandler } = await importFreshMessageUpdateHandlerModule() + const sessionID = "session-visible-assistant" + const clearCalls: string[] = [] + const deps = createDeps({ + data: [ + { info: { role: "user" }, parts: [{ type: "text", text: "latest question" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "visible answer" }] }, + ], + }) + const state = createFallbackState("google/gemini-2.5-pro") + state.pendingFallbackModel = "openai/gpt-5.4" + deps.sessionStates.set(sessionID, state) + deps.sessionAwaitingFallbackResult.add(sessionID) + deps.sessionStatusRetryKeys.set(sessionID, "retry:1") + const handler = createMessageUpdateHandler(deps, createHelpers(clearCalls)) + + // when + await handler({ + info: { + sessionID, + role: "assistant", + model: "openai/gpt-5.4", + }, + }) + + // then + expect(deps.sessionAwaitingFallbackResult.has(sessionID)).toBe(false) + expect(deps.sessionStatusRetryKeys.has(sessionID)).toBe(false) + expect(state.pendingFallbackModel).toBe(undefined) + expect(clearCalls).toEqual([sessionID]) + }) +}) diff --git a/src/hooks/runtime-fallback/visible-assistant-response.ts b/src/hooks/runtime-fallback/visible-assistant-response.ts new file mode 100644 index 000000000..fef9809b1 --- /dev/null +++ b/src/hooks/runtime-fallback/visible-assistant-response.ts @@ -0,0 +1,80 @@ +import type { HookDeps } from "./types" +import type { SessionMessage, SessionMessagePart } from "./session-messages" +import { extractSessionMessages } from "./session-messages" +import { extractAutoRetrySignal } from "./error-classifier" + +function getLastUserMessageIndex(messages: SessionMessage[]): number { + for (let index = messages.length - 1; index >= 0; index--) { + if (messages[index]?.info?.role === "user") { + return index + } + } + + return -1 +} + +function getAssistantText(parts: SessionMessagePart[] | undefined): string { + return (parts ?? []) + .flatMap((part) => { + if (part.type !== "text") { + return [] + } + + const text = typeof part.text === "string" ? part.text.trim() : "" + return text.length > 0 ? [text] : [] + }) + .join("\n") +} + +export function hasVisibleAssistantResponse(extractAutoRetrySignalFn: typeof extractAutoRetrySignal) { + return async ( + ctx: HookDeps["ctx"], + sessionID: string, + _info: Record | undefined, + ): Promise => { + try { + const messagesResponse = await ctx.client.session.messages({ + path: { id: sessionID }, + query: { directory: ctx.directory }, + }) + const messages = extractSessionMessages(messagesResponse) + if (!messages || messages.length === 0) return false + + const lastUserMessageIndex = getLastUserMessageIndex(messages) + if (lastUserMessageIndex === -1) return false + + for (let index = lastUserMessageIndex + 1; index < messages.length; index++) { + const message = messages[index] + if (message?.info?.role !== "assistant") { + continue + } + + if (message.info?.error) { + continue + } + + const infoParts = message.info?.parts + const infoMessageParts = Array.isArray(infoParts) + ? infoParts.filter((part): part is SessionMessagePart => typeof part === "object" && part !== null) + : undefined + const parts = message.parts && message.parts.length > 0 + ? message.parts + : infoMessageParts + const assistantText = getAssistantText(parts) + if (!assistantText) { + continue + } + + if (extractAutoRetrySignalFn({ message: assistantText })) { + continue + } + + return true + } + + return false + } catch { + return false + } + } +} diff --git a/src/plugin-handlers/agent-config-handler.test.ts b/src/plugin-handlers/agent-config-handler.test.ts new file mode 100644 index 000000000..064e86116 --- /dev/null +++ b/src/plugin-handlers/agent-config-handler.test.ts @@ -0,0 +1,258 @@ +/// + +import type { AgentConfig } from "@opencode-ai/sdk" +import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test" +import * as agents from "../agents" +import * as shared from "../shared" +import * as sisyphusJunior from "../agents/sisyphus-junior" +import type { OhMyOpenCodeConfig } from "../config" +import * as agentLoader from "../features/claude-code-agent-loader" +import * as skillLoader from "../features/opencode-skill-loader" +import { getAgentDisplayName } from "../shared/agent-display-names" +import { applyAgentConfig } from "./agent-config-handler" +import type { PluginComponents } from "./plugin-components-loader" + +const BUILTIN_SISYPHUS_DISPLAY_NAME = getAgentDisplayName("sisyphus") +const BUILTIN_SISYPHUS_JUNIOR_DISPLAY_NAME = getAgentDisplayName("sisyphus-junior") +const BUILTIN_MULTIMODAL_LOOKER_DISPLAY_NAME = getAgentDisplayName("multimodal-looker") + +function createPluginComponents(): PluginComponents { + return { + commands: {}, + skills: {}, + agents: {}, + mcpServers: {}, + hooksConfigs: [], + plugins: [], + errors: [], + } +} + +function createBaseConfig(): Record { + return { + model: "anthropic/claude-opus-4-6", + agent: {}, + } +} + +function createPluginConfig(): OhMyOpenCodeConfig { + return { + sisyphus_agent: { + planner_enabled: false, + }, + } +} + +describe("applyAgentConfig builtin override protection", () => { + let createBuiltinAgentsSpy: ReturnType + let createSisyphusJuniorAgentSpy: ReturnType + let discoverConfigSourceSkillsSpy: ReturnType + let discoverUserClaudeSkillsSpy: ReturnType + let discoverProjectClaudeSkillsSpy: ReturnType + let discoverOpencodeGlobalSkillsSpy: ReturnType + let discoverOpencodeProjectSkillsSpy: ReturnType + let loadUserAgentsSpy: ReturnType + let loadProjectAgentsSpy: ReturnType + let migrateAgentConfigSpy: ReturnType + let logSpy: ReturnType + + const builtinSisyphusConfig: AgentConfig = { + name: "Builtin Sisyphus", + prompt: "builtin prompt", + mode: "primary", + } + + const builtinOracleConfig: AgentConfig = { + name: "oracle", + prompt: "oracle prompt", + mode: "subagent", + } + + const builtinMultimodalLookerConfig: AgentConfig = { + name: "multimodal-looker", + prompt: "multimodal prompt", + mode: "subagent", + } + + const sisyphusJuniorConfig: AgentConfig = { + name: "Sisyphus-Junior", + prompt: "junior prompt", + mode: "all", + } + + beforeEach(() => { + createBuiltinAgentsSpy = spyOn(agents, "createBuiltinAgents").mockResolvedValue({ + sisyphus: builtinSisyphusConfig, + oracle: builtinOracleConfig, + "multimodal-looker": builtinMultimodalLookerConfig, + }) + + createSisyphusJuniorAgentSpy = spyOn( + sisyphusJunior, + "createSisyphusJuniorAgentWithOverrides", + ).mockReturnValue(sisyphusJuniorConfig) + + discoverConfigSourceSkillsSpy = spyOn( + skillLoader, + "discoverConfigSourceSkills", + ).mockResolvedValue([]) + discoverUserClaudeSkillsSpy = spyOn( + skillLoader, + "discoverUserClaudeSkills", + ).mockResolvedValue([]) + discoverProjectClaudeSkillsSpy = spyOn( + skillLoader, + "discoverProjectClaudeSkills", + ).mockResolvedValue([]) + discoverOpencodeGlobalSkillsSpy = spyOn( + skillLoader, + "discoverOpencodeGlobalSkills", + ).mockResolvedValue([]) + discoverOpencodeProjectSkillsSpy = spyOn( + skillLoader, + "discoverOpencodeProjectSkills", + ).mockResolvedValue([]) + + loadUserAgentsSpy = spyOn(agentLoader, "loadUserAgents").mockReturnValue({}) + loadProjectAgentsSpy = spyOn(agentLoader, "loadProjectAgents").mockReturnValue({}) + + migrateAgentConfigSpy = spyOn(shared, "migrateAgentConfig").mockImplementation( + (config: Record) => config, + ) + logSpy = spyOn(shared, "log").mockImplementation(() => {}) + }) + + afterEach(() => { + createBuiltinAgentsSpy.mockRestore() + createSisyphusJuniorAgentSpy.mockRestore() + discoverConfigSourceSkillsSpy.mockRestore() + discoverUserClaudeSkillsSpy.mockRestore() + discoverProjectClaudeSkillsSpy.mockRestore() + discoverOpencodeGlobalSkillsSpy.mockRestore() + discoverOpencodeProjectSkillsSpy.mockRestore() + loadUserAgentsSpy.mockRestore() + loadProjectAgentsSpy.mockRestore() + migrateAgentConfigSpy.mockRestore() + logSpy.mockRestore() + }) + + test("filters user agents whose key matches the builtin display-name alias", async () => { + // given + loadUserAgentsSpy.mockReturnValue({ + [BUILTIN_SISYPHUS_DISPLAY_NAME]: { + name: BUILTIN_SISYPHUS_DISPLAY_NAME, + prompt: "user alias prompt", + mode: "subagent", + }, + }) + + // when + const result = await applyAgentConfig({ + config: createBaseConfig(), + pluginConfig: createPluginConfig(), + ctx: { directory: "/tmp" }, + pluginComponents: createPluginComponents(), + }) + + // then + expect(result[BUILTIN_SISYPHUS_DISPLAY_NAME]).toEqual(builtinSisyphusConfig) + }) + + test("filters user agents whose key differs from a builtin key only by case", async () => { + // given + loadUserAgentsSpy.mockReturnValue({ + SiSyPhUs: { + name: "SiSyPhUs", + prompt: "mixed-case prompt", + mode: "subagent", + }, + }) + + // when + const result = await applyAgentConfig({ + config: createBaseConfig(), + pluginConfig: createPluginConfig(), + ctx: { directory: "/tmp" }, + pluginComponents: createPluginComponents(), + }) + + // then + expect(result[BUILTIN_SISYPHUS_DISPLAY_NAME]).toEqual(builtinSisyphusConfig) + expect(result.SiSyPhUs).toBeUndefined() + }) + + test("filters plugin agents whose key matches the builtin display-name alias", async () => { + // given + const pluginComponents = createPluginComponents() + pluginComponents.agents = { + [BUILTIN_SISYPHUS_DISPLAY_NAME]: { + name: BUILTIN_SISYPHUS_DISPLAY_NAME, + prompt: "plugin alias prompt", + mode: "subagent", + }, + } + + // when + const result = await applyAgentConfig({ + config: createBaseConfig(), + pluginConfig: createPluginConfig(), + ctx: { directory: "/tmp" }, + pluginComponents, + }) + + // then + expect(result[BUILTIN_SISYPHUS_DISPLAY_NAME]).toEqual(builtinSisyphusConfig) + }) + + describe("#given protected builtin agents use hyphenated names", () => { + describe("#when a user agent uses the underscored multimodal looker alias", () => { + test("filters the override", async () => { + // given + loadUserAgentsSpy.mockReturnValue({ + multimodal_looker: { + name: "multimodal_looker", + prompt: "user multimodal alias prompt", + mode: "subagent", + }, + }) + + // when + const result = await applyAgentConfig({ + config: createBaseConfig(), + pluginConfig: createPluginConfig(), + ctx: { directory: "/tmp" }, + pluginComponents: createPluginComponents(), + }) + + // then + expect(result[BUILTIN_MULTIMODAL_LOOKER_DISPLAY_NAME]).toEqual(builtinMultimodalLookerConfig) + expect(result.multimodal_looker).toBeUndefined() + }) + }) + + describe("#when a user agent uses the underscored sisyphus junior alias", () => { + test("filters the override", async () => { + // given + loadUserAgentsSpy.mockReturnValue({ + sisyphus_junior: { + name: "sisyphus_junior", + prompt: "user junior alias prompt", + mode: "subagent", + }, + }) + + // when + const result = await applyAgentConfig({ + config: createBaseConfig(), + pluginConfig: createPluginConfig(), + ctx: { directory: "/tmp" }, + pluginComponents: createPluginComponents(), + }) + + // then + expect(result[BUILTIN_SISYPHUS_JUNIOR_DISPLAY_NAME]).toEqual(sisyphusJuniorConfig) + expect(result.sisyphus_junior).toBeUndefined() + }) + }) + }) +}) diff --git a/src/plugin-handlers/agent-config-handler.ts b/src/plugin-handlers/agent-config-handler.ts index ac4c00bcc..4f5ad9eaf 100644 --- a/src/plugin-handlers/agent-config-handler.ts +++ b/src/plugin-handlers/agent-config-handler.ts @@ -15,6 +15,10 @@ import { loadProjectAgents, loadUserAgents } from "../features/claude-code-agent import type { PluginComponents } from "./plugin-components-loader"; import { reorderAgentsByPriority } from "./agent-priority-order"; import { remapAgentKeysToDisplayNames } from "./agent-key-remapper"; +import { + createProtectedAgentNameSet, + filterProtectedAgentOverrides, +} from "./agent-override-protection"; import { buildPrometheusAgentConfig } from "./prometheus-agent-config-builder"; import { buildPlanDemoteConfig } from "./plan-model-inheritance"; @@ -209,19 +213,21 @@ export async function applyAgentConfig(params: { ) : undefined; - // Collect all builtin agent names to prevent user/project .md files from overriding them - const builtinAgentNames = new Set([ + const protectedBuiltinAgentNames = createProtectedAgentNameSet([ ...Object.keys(agentConfig), ...Object.keys(builtinAgents), ]); - - // Filter user/project agents that duplicate builtin agents (they have mode: "subagent" hardcoded - // in loadAgentsFromDir which would incorrectly override the builtin mode: "primary") - const filteredUserAgents = Object.fromEntries( - Object.entries(userAgents).filter(([key]) => !builtinAgentNames.has(key)), + const filteredUserAgents = filterProtectedAgentOverrides( + userAgents, + protectedBuiltinAgentNames, ); - const filteredProjectAgents = Object.fromEntries( - Object.entries(projectAgents).filter(([key]) => !builtinAgentNames.has(key)), + const filteredProjectAgents = filterProtectedAgentOverrides( + projectAgents, + protectedBuiltinAgentNames, + ); + const filteredPluginAgents = filterProtectedAgentOverrides( + pluginAgents, + protectedBuiltinAgentNames, ); params.config.agent = { @@ -231,26 +237,33 @@ export async function applyAgentConfig(params: { ), ...filterDisabledAgents(filteredUserAgents), ...filterDisabledAgents(filteredProjectAgents), - ...filterDisabledAgents(pluginAgents), + ...filterDisabledAgents(filteredPluginAgents), ...filteredConfigAgents, build: { ...migratedBuild, mode: "subagent", hidden: true }, ...(planDemoteConfig ? { plan: planDemoteConfig } : {}), }; } else { - // Filter user/project agents that duplicate builtin agents - const builtinAgentNames = new Set(Object.keys(builtinAgents)); - const filteredUserAgents = Object.fromEntries( - Object.entries(userAgents).filter(([key]) => !builtinAgentNames.has(key)), + const protectedBuiltinAgentNames = createProtectedAgentNameSet( + Object.keys(builtinAgents), ); - const filteredProjectAgents = Object.fromEntries( - Object.entries(projectAgents).filter(([key]) => !builtinAgentNames.has(key)), + const filteredUserAgents = filterProtectedAgentOverrides( + userAgents, + protectedBuiltinAgentNames, + ); + const filteredProjectAgents = filterProtectedAgentOverrides( + projectAgents, + protectedBuiltinAgentNames, + ); + const filteredPluginAgents = filterProtectedAgentOverrides( + pluginAgents, + protectedBuiltinAgentNames, ); params.config.agent = { ...builtinAgents, ...filterDisabledAgents(filteredUserAgents), ...filterDisabledAgents(filteredProjectAgents), - ...filterDisabledAgents(pluginAgents), + ...filterDisabledAgents(filteredPluginAgents), ...configAgent, }; } diff --git a/src/plugin-handlers/agent-override-protection.ts b/src/plugin-handlers/agent-override-protection.ts new file mode 100644 index 000000000..1954b6529 --- /dev/null +++ b/src/plugin-handlers/agent-override-protection.ts @@ -0,0 +1,34 @@ +const PARENTHETICAL_SUFFIX_PATTERN = /\s*(\([^)]*\)\s*)+$/u + +export function normalizeProtectedAgentName(agentName: string): string { + return agentName + .trim() + .toLowerCase() + .replace(PARENTHETICAL_SUFFIX_PATTERN, "") + .replace(/[-_]/g, "") + .trim() +} + +export function createProtectedAgentNameSet(agentNames: Iterable): Set { + const protectedAgentNames = new Set() + + for (const agentName of agentNames) { + const normalizedAgentName = normalizeProtectedAgentName(agentName) + if (normalizedAgentName.length === 0) continue + + protectedAgentNames.add(normalizedAgentName) + } + + return protectedAgentNames +} + +export function filterProtectedAgentOverrides( + agents: Record, + protectedAgentNames: ReadonlySet, +): Record { + return Object.fromEntries( + Object.entries(agents).filter(([agentName]) => { + return !protectedAgentNames.has(normalizeProtectedAgentName(agentName)) + }), + ) +} diff --git a/src/plugin/event.model-fallback.test.ts b/src/plugin/event.model-fallback.test.ts index f8a92b6f1..b6a1f6966 100644 --- a/src/plugin/event.model-fallback.test.ts +++ b/src/plugin/event.model-fallback.test.ts @@ -212,8 +212,8 @@ describe("createEventHandler - model fallback", () => { expect(abortCalls).toEqual([sessionID]) expect(promptCalls).toEqual([sessionID]) expect(output.message["model"]).toMatchObject({ - providerID: "kimi-for-coding", - modelID: "k2p5", + providerID: "opencode-go", + modelID: "kimi-k2.5", }) expect(output.message["variant"]).toBeUndefined() }) @@ -540,19 +540,19 @@ describe("createEventHandler - model fallback", () => { //#then - first fallback entry applied (no-op skip: claude-opus-4-6 matches current model after normalization) expect(first.message["model"]).toMatchObject({ - providerID: "kimi-for-coding", - modelID: "k2p5", + providerID: "opencode-go", + modelID: "kimi-k2.5", }) expect(first.message["variant"]).toBeUndefined() //#when - second retry cycle const second = await triggerRetryCycle() - //#then - second fallback entry applied (chain advanced past k2p5) + //#then - second fallback entry applied (chain advanced past opencode-go/kimi-k2.5) expect(second.message["model"]).toMatchObject({ - modelID: "kimi-k2.5", + providerID: "kimi-for-coding", + modelID: "k2p5", }) - expect((second.message["model"] as { providerID?: string })?.providerID).toBeTruthy() expect(second.message["variant"]).toBeUndefined() expect(abortCalls).toEqual([sessionID, sessionID]) expect(promptCalls).toEqual([sessionID, sessionID]) diff --git a/src/plugin/event.ts b/src/plugin/event.ts index 0246c45c4..d18ec3691 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -319,6 +319,7 @@ export function createEventHandler(args: { } if (sessionInfo?.id) { + const wasSyncSubagentSession = syncSubagentSessions.has(sessionInfo.id); clearSessionAgent(sessionInfo.id); lastHandledModelErrorMessageID.delete(sessionInfo.id); lastHandledRetryStatusKey.delete(sessionInfo.id); @@ -329,6 +330,9 @@ export function createEventHandler(args: { firstMessageVariantGate.clear(sessionInfo.id); clearSessionModel(sessionInfo.id); syncSubagentSessions.delete(sessionInfo.id); + if (wasSyncSubagentSession) { + subagentSessions.delete(sessionInfo.id); + } deleteSessionTools(sessionInfo.id); await managers.skillMcpManager.disconnectSession(sessionInfo.id); await lspManager.cleanupTempDirectoryClients(); diff --git a/src/tools/call-omo-agent/reused-sync-session-delete-cleanup.test.ts b/src/tools/call-omo-agent/reused-sync-session-delete-cleanup.test.ts new file mode 100644 index 000000000..cfaf9f497 --- /dev/null +++ b/src/tools/call-omo-agent/reused-sync-session-delete-cleanup.test.ts @@ -0,0 +1,87 @@ +import { afterEach, describe, expect, it } from "bun:test" + +import { + _resetForTesting, + subagentSessions, + syncSubagentSessions, +} from "../../features/claude-code-session-state" +import { createEventHandler } from "../../plugin/event" + +function createMinimalEventHandler() { + return createEventHandler({ + ctx: {} as never, + pluginConfig: {} as never, + firstMessageVariantGate: { + markSessionCreated: () => {}, + clear: () => {}, + }, + managers: { + tmuxSessionManager: { + onSessionCreated: async () => {}, + onSessionDeleted: async () => {}, + }, + skillMcpManager: { + disconnectSession: async () => {}, + }, + } as never, + hooks: { + autoUpdateChecker: { event: async () => {} }, + claudeCodeHooks: { event: async () => {} }, + backgroundNotificationHook: { event: async () => {} }, + sessionNotification: async () => {}, + todoContinuationEnforcer: { handler: async () => {} }, + unstableAgentBabysitter: { event: async () => {} }, + contextWindowMonitor: { event: async () => {} }, + directoryAgentsInjector: { event: async () => {} }, + directoryReadmeInjector: { event: async () => {} }, + rulesInjector: { event: async () => {} }, + thinkMode: { event: async () => {} }, + anthropicContextWindowLimitRecovery: { event: async () => {} }, + runtimeFallback: undefined, + modelFallback: undefined, + agentUsageReminder: { event: async () => {} }, + categorySkillReminder: { event: async () => {} }, + interactiveBashSession: { event: async () => {} }, + ralphLoop: { event: async () => {} }, + stopContinuationGuard: { event: async () => {}, isStopped: () => false }, + compactionTodoPreserver: { event: async () => {} }, + writeExistingFileGuard: { event: async () => {} }, + atlasHook: { handler: async () => {} }, + } as never, + }) +} + +describe("reused sync session delete cleanup", () => { + afterEach(() => { + _resetForTesting() + }) + + it("removes reused sync sessions from subagentSessions when session.deleted fires", async () => { + // given + const syncSessionID = "ses-reused-sync-delete-cleanup" + const unrelatedSubagentSessionID = "ses-unrelated-subagent-delete-cleanup" + const eventHandler = createMinimalEventHandler() + const input = { + event: { + type: "session.deleted", + properties: { + info: { + id: syncSessionID, + }, + }, + }, + } as Parameters>[0] + + subagentSessions.add(syncSessionID) + syncSubagentSessions.add(syncSessionID) + subagentSessions.add(unrelatedSubagentSessionID) + + // when + await eventHandler(input) + + // then + expect(syncSubagentSessions.has(syncSessionID)).toBe(false) + expect(subagentSessions.has(syncSessionID)).toBe(false) + expect(subagentSessions.has(unrelatedSubagentSessionID)).toBe(true) + }) +}) diff --git a/src/tools/delegate-task/cancel-unstable-agent-task.ts b/src/tools/delegate-task/cancel-unstable-agent-task.ts new file mode 100644 index 000000000..22deed878 --- /dev/null +++ b/src/tools/delegate-task/cancel-unstable-agent-task.ts @@ -0,0 +1,19 @@ +import type { ExecutorContext } from "./executor-types" + +export async function cancelUnstableAgentTask( + manager: ExecutorContext["manager"], + taskID: string | undefined, + reason: string +): Promise { + if (!taskID || typeof manager.cancelTask !== "function") { + return + } + + await Promise.allSettled([ + manager.cancelTask(taskID, { + source: "unstable-agent-task", + reason, + skipNotification: true, + }), + ]) +} diff --git a/src/tools/delegate-task/unstable-agent-cleanup.test.ts b/src/tools/delegate-task/unstable-agent-cleanup.test.ts new file mode 100644 index 000000000..3647351e0 --- /dev/null +++ b/src/tools/delegate-task/unstable-agent-cleanup.test.ts @@ -0,0 +1,176 @@ +declare const require: (name: string) => any +const { describe, test, expect, beforeEach, afterEach } = require("bun:test") + +import { __resetTimingConfig, __setTimingConfig } from "./timing" + +function createArgs() { + return { + description: "cleanup case", + prompt: "run", + category: "unspecified-low", + run_in_background: false, + load_skills: [], + command: undefined, + } +} + +function createToolContext(aborted = false) { + const controller = new AbortController() + if (aborted) { + controller.abort() + } + + return { + sessionID: "parent-session", + messageID: "parent-message", + agent: "test-agent", + abort: controller.signal, + metadata: () => Promise.resolve(), + } +} + +function createParentContext() { + return { + sessionID: "parent-session", + messageID: "parent-message", + model: "gpt-test", + agent: "test-agent", + } +} + +describe("executeUnstableAgentTask cleanup", () => { + beforeEach(() => { + __setTimingConfig({ + POLL_INTERVAL_MS: 10, + MIN_STABILITY_TIME_MS: 0, + STABILITY_POLLS_REQUIRED: 1, + WAIT_FOR_SESSION_TIMEOUT_MS: 100, + WAIT_FOR_SESSION_INTERVAL_MS: 10, + }) + }) + + afterEach(() => { + __resetTimingConfig() + }) + + test("cancels launched task when parent aborts during monitoring", async () => { + // given + const { executeUnstableAgentTask } = require("./unstable-agent-task") + const cancelCalls: Array<{ taskId: string; options?: Record }> = [] + + const mockManager = { + launch: async () => ({ id: "bg_abort_monitoring", sessionID: "ses_abort_monitoring", status: "running" }), + getTask: () => ({ id: "bg_abort_monitoring", sessionID: "ses_abort_monitoring", status: "running" }), + cancelTask: async (taskId: string, options?: Record) => { + cancelCalls.push({ taskId, options }) + return true + }, + } + + // when + const result = await executeUnstableAgentTask( + createArgs(), + createToolContext(true), + { + manager: mockManager, + client: { + session: { + status: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + }, + }, + createParentContext(), + "test-agent", + undefined, + undefined, + "gpt-test" + ) + + // then + expect(result).toContain("Task aborted (was running in background mode).") + expect(cancelCalls).toHaveLength(1) + expect(cancelCalls[0]?.taskId).toBe("bg_abort_monitoring") + }) + + test("cancels launched task when monitored timeout budget is exhausted", async () => { + // given + const { executeUnstableAgentTask } = require("./unstable-agent-task") + const cancelCalls: Array<{ taskId: string; options?: Record }> = [] + + const mockManager = { + launch: async () => ({ id: "bg_timeout_cleanup", sessionID: "ses_timeout_cleanup", status: "running" }), + getTask: () => ({ id: "bg_timeout_cleanup", sessionID: "ses_timeout_cleanup", status: "running" }), + cancelTask: async (taskId: string, options?: Record) => { + cancelCalls.push({ taskId, options }) + return true + }, + } + + // when + const result = await executeUnstableAgentTask( + createArgs(), + createToolContext(), + { + manager: mockManager, + client: { + session: { + status: async () => ({ data: { ses_timeout_cleanup: { type: "busy" } } }), + messages: async () => ({ data: [] }), + }, + }, + syncPollTimeoutMs: 0, + }, + createParentContext(), + "test-agent", + undefined, + undefined, + "gpt-test" + ) + + // then + expect(result).toContain("SUPERVISED TASK TIMED OUT") + expect(cancelCalls).toHaveLength(1) + expect(cancelCalls[0]?.taskId).toBe("bg_timeout_cleanup") + }) + + test("cancels launched task when parent aborts while waiting for session start", async () => { + // given + const { executeUnstableAgentTask } = require("./unstable-agent-task") + const cancelCalls: Array<{ taskId: string; options?: Record }> = [] + + const mockManager = { + launch: async () => ({ id: "bg_wait_abort", status: "pending" }), + getTask: () => ({ id: "bg_wait_abort", status: "pending" }), + cancelTask: async (taskId: string, options?: Record) => { + cancelCalls.push({ taskId, options }) + return true + }, + } + + // when + const result = await executeUnstableAgentTask( + createArgs(), + createToolContext(true), + { + manager: mockManager, + client: { + session: { + status: async () => ({ data: {} }), + messages: async () => ({ data: [] }), + }, + }, + }, + createParentContext(), + "test-agent", + undefined, + undefined, + "gpt-test" + ) + + // then + expect(result).toContain("Task aborted while waiting for session to start.") + expect(cancelCalls).toHaveLength(1) + expect(cancelCalls[0]?.taskId).toBe("bg_wait_abort") + }) +}) diff --git a/src/tools/delegate-task/unstable-agent-task.ts b/src/tools/delegate-task/unstable-agent-task.ts index 5b92955bd..8aa2dce81 100644 --- a/src/tools/delegate-task/unstable-agent-task.ts +++ b/src/tools/delegate-task/unstable-agent-task.ts @@ -2,6 +2,7 @@ import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types" import type { ExecutorContext, ParentContext, SessionMessage } from "./executor-types" import { DEFAULT_SYNC_POLL_TIMEOUT_MS, getTimingConfig } from "./timing" import { buildTaskPrompt } from "./prompt-builder" +import { cancelUnstableAgentTask } from "./cancel-unstable-agent-task" import { storeToolMetadata } from "../../features/tool-metadata-store" import { formatDuration } from "./time-formatter" import { formatDetailedError } from "./error-formatting" @@ -20,6 +21,8 @@ export async function executeUnstableAgentTask( actualModel: string | undefined ): Promise { const { manager, client, syncPollTimeoutMs } = executorCtx + let cleanupReason: string | undefined + let launchedTaskID: string | undefined try { const effectivePrompt = buildTaskPrompt(args.prompt, agentToUse) @@ -38,12 +41,14 @@ export async function executeUnstableAgentTask( category: args.category, sessionPermission: QUESTION_DENIED_SESSION_PERMISSION, }) + launchedTaskID = task.id const timing = getTimingConfig() const waitStart = Date.now() let sessionID = task.sessionID while (!sessionID && Date.now() - waitStart < timing.WAIT_FOR_SESSION_TIMEOUT_MS) { if (ctx.abort?.aborted) { + cleanupReason = "Parent aborted while waiting for unstable task session start" return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}` } await new Promise(resolve => setTimeout(resolve, timing.WAIT_FOR_SESSION_INTERVAL_MS)) @@ -51,6 +56,7 @@ export async function executeUnstableAgentTask( sessionID = updated?.sessionID } if (!sessionID) { + cleanupReason = "Unstable task session start timed out before session became available" return formatDetailedError(new Error(`Task failed to start within timeout (30s). Task ID: ${task.id}, Status: ${task.status}`), { operation: "Launch monitored background task", args, @@ -88,6 +94,7 @@ export async function executeUnstableAgentTask( while (Date.now() - pollStart < (syncPollTimeoutMs ?? DEFAULT_SYNC_POLL_TIMEOUT_MS)) { if (ctx.abort?.aborted) { + cleanupReason = "Parent aborted while monitoring unstable background task" return `Task aborted (was running in background mode).\n\nSession ID: ${sessionID}` } @@ -148,6 +155,7 @@ session_id: ${sessionID} } if (!completedDuringMonitoring) { + cleanupReason = "Monitored unstable background task exceeded timeout budget" const duration = formatDuration(startTime) const timeoutBudgetMs = syncPollTimeoutMs ?? DEFAULT_SYNC_POLL_TIMEOUT_MS return `SUPERVISED TASK TIMED OUT @@ -215,11 +223,18 @@ ${textContent || "(No text output)"} session_id: ${sessionID} ` } catch (error) { + if (!cleanupReason) { + cleanupReason = "exception" + } return formatDetailedError(error, { operation: "Launch monitored background task", args, agent: agentToUse, category: args.category, }) + } finally { + if (cleanupReason) { + await cancelUnstableAgentTask(manager, launchedTaskID, cleanupReason) + } } } diff --git a/src/tools/slashcommand/execution-compatibility.test.ts b/src/tools/slashcommand/execution-compatibility.test.ts new file mode 100644 index 000000000..92ef26216 --- /dev/null +++ b/src/tools/slashcommand/execution-compatibility.test.ts @@ -0,0 +1,63 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test" +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs" +import { tmpdir } from "node:os" +import { join } from "node:path" +import { executeSlashCommand } from "../../hooks/auto-slash-command/executor" +import { discoverCommandsSync } from "./command-discovery" + +describe("slashcommand discovery and execution compatibility", () => { + let tempDir = "" + let originalWorkingDirectory = "" + let originalOpencodeConfigDir: string | undefined + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "omo-slashcommand-compat-test-")) + originalWorkingDirectory = process.cwd() + originalOpencodeConfigDir = process.env.OPENCODE_CONFIG_DIR + }) + + afterEach(() => { + process.chdir(originalWorkingDirectory) + + if (originalOpencodeConfigDir === undefined) { + delete process.env.OPENCODE_CONFIG_DIR + } else { + process.env.OPENCODE_CONFIG_DIR = originalOpencodeConfigDir + } + + rmSync(tempDir, { recursive: true, force: true }) + }) + + it("executes commands discovered from a parent opencode config dir", async () => { + // given + const projectDir = join(tempDir, "project") + const opencodeRootDir = join(tempDir, "opencode-root") + const profileConfigDir = join(opencodeRootDir, "profiles", "codex") + const parentCommandDir = join(opencodeRootDir, "command") + const commandName = "parent-only-command" + + mkdirSync(projectDir, { recursive: true }) + mkdirSync(profileConfigDir, { recursive: true }) + mkdirSync(parentCommandDir, { recursive: true }) + writeFileSync( + join(parentCommandDir, `${commandName}.md`), + `---\ndescription: Parent config command\n---\nExecute from parent config.\n`, + ) + process.env.OPENCODE_CONFIG_DIR = profileConfigDir + process.chdir(projectDir) + + expect(discoverCommandsSync(projectDir).some(command => command.name === commandName)).toBe(true) + + // when + const result = await executeSlashCommand({ + command: commandName, + args: "", + raw: `/${commandName}`, + }, { skills: [] }) + + // then + expect(result.success).toBe(true) + expect(result.replacementText).toContain("Execute from parent config.") + expect(result.replacementText).toContain("**Scope**: opencode") + }) +})