From b0e2630db1f4afc93316dc56d726bc179d9d8529 Mon Sep 17 00:00:00 2001 From: ismeth Date: Tue, 17 Feb 2026 14:52:22 +0100 Subject: [PATCH] =?UTF-8?q?fix(athena):=20make=20council=20tool=20blocking?= =?UTF-8?q?=20=E2=80=94=20collect=20results=20directly=20instead=20of=20po?= =?UTF-8?q?lling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The athena_council tool now waits for all council members to complete and returns their collected results as markdown, eliminating the need for Athena to repeatedly call background_output per member (which created excessive UI noise). - Add result-collector.ts that polls task status and fetches session content - Update tool to accept BackgroundOutputClient and return formatted markdown - Update Athena prompt to remove background_output polling steps - Rewrite tests for new blocking behavior and markdown output format --- src/agents/athena/agent.ts | 16 +-- src/plugin/tool-registry.ts | 1 + src/tools/athena-council/result-collector.ts | 116 +++++++++++++++ src/tools/athena-council/tools.test.ts | 142 +++++++++---------- src/tools/athena-council/tools.ts | 68 +++++---- 5 files changed, 235 insertions(+), 108 deletions(-) create mode 100644 src/tools/athena-council/result-collector.ts diff --git a/src/agents/athena/agent.ts b/src/agents/athena/agent.ts index c79c33f4b..06fa7fdf5 100644 --- a/src/agents/athena/agent.ts +++ b/src/agents/athena/agent.ts @@ -67,20 +67,14 @@ You are an ORCHESTRATOR, not an analyst. Your council members do the analysis. Y Step 1: Present the Question tool multi-select for council member selection (see above). Once the user responds, call athena_council with the user's question. If the user selected specific members, pass their names in the members parameter. If the user selected "All Members", omit the members parameter. -Step 2: Call athena_council with the question and selected members. It returns immediately with task IDs for launched council members. +Step 2: Call athena_council with the question and selected members. The tool launches all council members in parallel, waits for them to complete, and returns ALL of their responses in a single result. This may take a few minutes — that is expected. -Step 3: After calling athena_council, use background_output(task_id="...") for each returned task ID to retrieve each member's full response. -- The system will notify you when tasks complete. -- Call background_output for each completed task. -- Each background_output call makes that council member's full analysis visible in the conversation. -- Do NOT synthesize until ALL launched members have responded. - -Step 4: After collecting ALL council member responses via background_output, synthesize findings: +Step 3: Synthesize the findings returned by athena_council: - Group findings by agreement level: unanimous, majority, minority, solo - Solo findings are potential false positives — flag the risk explicitly - Add your own assessment and rationale to each finding -Step 5: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take: +Step 4: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take: Question({ questions: [{ @@ -95,7 +89,7 @@ Question({ }] }) -Step 6: After the user selects an action: +Step 5: After the user selects an action: - **"Fix now (Atlas)"** → Call switch_agent with agent="atlas" and context containing the confirmed findings summary, the original question, and instruction to implement the fixes. - **"Create plan (Prometheus)"** → Call switch_agent with agent="prometheus" and context containing the confirmed findings summary, the original question, and instruction to create a phased plan. - **"No action"** → Acknowledge and end. Do not delegate. @@ -105,7 +99,7 @@ The switch_agent tool switches the active agent. After you call it, end your res ## Constraints - Use the Question tool for member selection BEFORE calling athena_council (unless user pre-specified). - Use the Question tool for action selection AFTER synthesis (unless user already stated intent). -- After athena_council, use background_output for each returned task ID before synthesizing. +- Do NOT use background_output — athena_council returns all member responses directly. - Do NOT write or edit files directly. - Do NOT delegate without explicit user confirmation via Question tool. - Do NOT ignore solo finding false-positive warnings. diff --git a/src/plugin/tool-registry.ts b/src/plugin/tool-registry.ts index e55d94a17..787afaf3a 100644 --- a/src/plugin/tool-registry.ts +++ b/src/plugin/tool-registry.ts @@ -55,6 +55,7 @@ export function createToolRegistry(args: { const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: managers.backgroundManager, councilConfig: athenaCouncilConfig, + client: ctx.client, }) const isMultimodalLookerEnabled = !(pluginConfig.disabled_agents ?? []).some( diff --git a/src/tools/athena-council/result-collector.ts b/src/tools/athena-council/result-collector.ts new file mode 100644 index 000000000..bcfe25ef3 --- /dev/null +++ b/src/tools/athena-council/result-collector.ts @@ -0,0 +1,116 @@ +import type { BackgroundManager } from "../../features/background-agent" +import type { CouncilLaunchedMember } from "../../agents/athena/types" +import type { BackgroundOutputClient, BackgroundOutputMessagesResult } from "../background-task/clients" +import { extractMessages, getErrorMessage } from "../background-task/session-messages" + +const POLL_INTERVAL_MS = 2_000 +const DEFAULT_TIMEOUT_MS = 5 * 60 * 1_000 + +export interface CollectedMemberResult { + name: string + model: string + taskId: string + status: "completed" | "error" | "cancelled" | "timeout" + content: string +} + +export interface CollectedCouncilResults { + results: CollectedMemberResult[] + allCompleted: boolean +} + +/** + * Waits for all launched council members to complete, then fetches their + * session messages and returns extracted text content. + * + * This replaces the previous flow where Athena had to manually poll + * background_output for each member, which created excessive UI noise. + */ +export async function collectCouncilResults( + launched: CouncilLaunchedMember[], + manager: BackgroundManager, + client: BackgroundOutputClient, + abort?: AbortSignal, + timeoutMs = DEFAULT_TIMEOUT_MS +): Promise { + const pendingIds = new Set(launched.map((m) => m.taskId)) + const completedMap = new Map() + const deadline = Date.now() + timeoutMs + + while (pendingIds.size > 0 && Date.now() < deadline) { + if (abort?.aborted) break + + for (const taskId of pendingIds) { + const task = manager.getTask(taskId) + if (!task) { + completedMap.set(taskId, "error") + pendingIds.delete(taskId) + continue + } + if (task.status === "completed") { + completedMap.set(taskId, "completed") + pendingIds.delete(taskId) + } else if (task.status === "error" || task.status === "cancelled" || task.status === "interrupt") { + completedMap.set(taskId, task.status === "interrupt" ? "cancelled" : task.status) + pendingIds.delete(taskId) + } + } + + if (pendingIds.size > 0) { + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) + } + } + + const results: CollectedMemberResult[] = [] + + for (const entry of launched) { + const memberName = entry.member.name ?? entry.member.model + const status = completedMap.get(entry.taskId) ?? "timeout" + + if (status !== "completed") { + results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content: "" }) + continue + } + + const content = await fetchMemberContent(entry.taskId, manager, client) + results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content }) + } + + return { + results, + allCompleted: pendingIds.size === 0, + } +} + +async function fetchMemberContent( + taskId: string, + manager: BackgroundManager, + client: BackgroundOutputClient +): Promise { + const task = manager.getTask(taskId) + if (!task?.sessionID) return "(No session available)" + + const messagesResult: BackgroundOutputMessagesResult = await client.session.messages({ + path: { id: task.sessionID }, + }) + + const errorMsg = getErrorMessage(messagesResult) + if (errorMsg) return `(Error fetching results: ${errorMsg})` + + const messages = extractMessages(messagesResult) + if (!Array.isArray(messages) || messages.length === 0) return "(No messages found)" + + const assistantMessages = messages.filter((m) => m.info?.role === "assistant") + if (assistantMessages.length === 0) return "(No assistant response found)" + + const textParts: string[] = [] + for (const message of assistantMessages) { + for (const part of message.parts ?? []) { + if ((part.type === "text" || part.type === "reasoning") && part.text) { + textParts.push(part.text) + } + } + } + + return textParts.join("\n\n") || "(No text content)" +} diff --git a/src/tools/athena-council/tools.test.ts b/src/tools/athena-council/tools.test.ts index 8c8b56c47..1d8890224 100644 --- a/src/tools/athena-council/tools.test.ts +++ b/src/tools/athena-council/tools.test.ts @@ -3,8 +3,21 @@ import { describe, expect, test } from "bun:test" import type { BackgroundManager } from "../../features/background-agent" import type { BackgroundTask } from "../../features/background-agent/types" +import type { BackgroundOutputClient } from "../background-task/clients" import { createAthenaCouncilTool, filterCouncilMembers } from "./tools" +const mockClient = { + session: { + messages: async () => ({ + data: [{ + id: "msg-1", + info: { role: "assistant" }, + parts: [{ type: "text", text: "Test analysis result" }], + }], + }), + }, +} as unknown as BackgroundOutputClient + const mockManager = { getTask: () => undefined, launch: async () => { @@ -25,27 +38,19 @@ const configuredMembers = [ { model: "google/gemini-3-pro" }, ] -function createRunningTask(id: string): BackgroundTask { +function createCompletedTask(id: string): BackgroundTask { return { id, parentSessionID: "session-1", parentMessageID: "message-1", description: `Council member task ${id}`, prompt: "prompt", - agent: "athena", - status: "running", + agent: "council-member", + status: "completed", + sessionID: `ses-${id}`, } } -function parseLaunchResult(result: unknown): { - launched: number - members: Array<{ task_id: string; name: string; model: string; status: string }> - failed: Array<{ name: string; model: string; error: string }> -} { - expect(typeof result).toBe("string") - return JSON.parse(result as string) -} - describe("filterCouncilMembers", () => { test("returns all members when selection is undefined", () => { // #given @@ -142,6 +147,7 @@ describe("createAthenaCouncilTool", () => { const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: undefined, + client: mockClient, }) // #when @@ -156,6 +162,7 @@ describe("createAthenaCouncilTool", () => { const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: { members: [] }, + client: mockClient, }) // #when @@ -170,6 +177,7 @@ describe("createAthenaCouncilTool", () => { const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] }, + client: mockClient, }) // #then - description should be dynamic and include the member model @@ -184,6 +192,7 @@ describe("createAthenaCouncilTool", () => { const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: mockManager, councilConfig: { members: configuredMembers }, + client: mockClient, }) const toolArgs = { question: "Who should investigate this?", @@ -197,63 +206,54 @@ describe("createAthenaCouncilTool", () => { expect(result).toBe("Unknown council members: unknown-model. Available members: Claude, GPT, google/gemini-3-pro.") }) - test("returns launched task_ids and member mapping for configured council", async () => { + test("returns collected markdown results for all configured council members", async () => { // #given let launchCount = 0 + const taskStore = new Map() const launchManager = { launch: async () => { launchCount += 1 - return createRunningTask(`bg-${launchCount}`) + const task = createCompletedTask(`bg-${launchCount}`) + taskStore.set(task.id, task) + return task }, - getTask: () => undefined, + getTask: (id: string) => taskStore.get(id), } as unknown as BackgroundManager const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: launchManager, councilConfig: { members: configuredMembers }, + client: mockClient, }) // #when const result = await athenaCouncilTool.execute({ question: "How should we proceed?" }, mockToolContext) - const parsed = parseLaunchResult(result) - // #then - expect(parsed.launched).toBe(3) - expect(parsed.failed).toEqual([]) - expect(parsed.members).toEqual([ - { - task_id: "bg-1", - name: "Claude", - model: "anthropic/claude-sonnet-4-5", - status: "running", - }, - { - task_id: "bg-2", - name: "GPT", - model: "openai/gpt-5.3-codex", - status: "running", - }, - { - task_id: "bg-3", - name: "google/gemini-3-pro", - model: "google/gemini-3-pro", - status: "running", - }, - ]) + // #then - returns markdown with council results, one section per member + expect(result).toContain("## Council Results") + expect(result).toContain("How should we proceed?") + expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)") + expect(result).toContain("### GPT (openai/gpt-5.3-codex)") + expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)") + expect(result).toContain("Test analysis result") }) - test("returns task_ids length matching selected members", async () => { + test("returns collected results only for selected members", async () => { // #given let launchCount = 0 + const taskStore = new Map() const launchManager = { launch: async () => { launchCount += 1 - return createRunningTask(`bg-${launchCount}`) + const task = createCompletedTask(`bg-${launchCount}`) + taskStore.set(task.id, task) + return task }, - getTask: () => undefined, + getTask: (id: string) => taskStore.get(id), } as unknown as BackgroundManager const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: launchManager, councilConfig: { members: configuredMembers }, + client: mockClient, }) // #when @@ -264,54 +264,50 @@ describe("createAthenaCouncilTool", () => { }, mockToolContext ) - const parsed = parseLaunchResult(result) - // #then - expect(parsed.launched).toBe(2) - expect(parsed.members).toHaveLength(2) - expect(parsed.members.map((member) => member.name)).toEqual(["GPT", "google/gemini-3-pro"]) + // #then - only selected members appear in output + expect(result).toContain("### GPT (openai/gpt-5.3-codex)") + expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)") + expect(result).not.toContain("### Claude") + expect(launchCount).toBe(2) }) - test("returns failed launches inline while keeping successful task mappings", async () => { + test("includes launch failures alongside successful member results", async () => { // #given let launchCount = 0 + const taskStore = new Map() const launchManager = { launch: async () => { launchCount += 1 if (launchCount === 2) { throw new Error("provider outage") } - - return createRunningTask(`bg-${launchCount}`) + const task = createCompletedTask(`bg-${launchCount}`) + taskStore.set(task.id, task) + return task }, - getTask: () => undefined, + getTask: (id: string) => taskStore.get(id), } as unknown as BackgroundManager const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: launchManager, councilConfig: { members: configuredMembers }, + client: mockClient, }) // #when const result = await athenaCouncilTool.execute({ question: "Any concerns?" }, mockToolContext) - const parsed = parseLaunchResult(result) - // #then - expect(parsed.launched).toBe(2) - expect(parsed.members).toHaveLength(2) - expect(parsed.failed).toHaveLength(1) - expect(parsed.failed[0]).toEqual({ - name: "GPT", - model: "openai/gpt-5.3-codex", - error: "Launch failed: Error: provider outage", - }) + // #then - successful members have results, failed member listed in failures section + expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)") + expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)") + expect(result).toContain("### Launch Failures") + expect(result).toContain("**GPT**") + expect(result).toContain("provider outage") }) test("returns dedup error when council is already running in same session", async () => { - // #given - let resolveLaunch: ((task: BackgroundTask) => void) | undefined - const pendingLaunch = new Promise((resolve) => { - resolveLaunch = resolve - }) + // #given - use a never-resolving launch to keep the first execution in-flight + const pendingLaunch = new Promise(() => {}) const launchManager = { launch: async () => pendingLaunch, getTask: () => undefined, @@ -319,17 +315,19 @@ describe("createAthenaCouncilTool", () => { const athenaCouncilTool = createAthenaCouncilTool({ backgroundManager: launchManager, councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] }, + client: mockClient, }) - // #when - const firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext) - const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext) + // #when - first call starts but never resolves (stuck in launch) + // second call should be rejected by session guard + const _firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext) - resolveLaunch?.(createRunningTask("bg-dedup")) - const firstResult = parseLaunchResult(await firstExecution) + // Allow microtask queue to process so markCouncilRunning is called + await new Promise((resolve) => setTimeout(resolve, 0)) + + const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext) // #then expect(secondExecution).toBe("Council is already running for this session. Wait for the current council execution to complete.") - expect(firstResult.launched).toBe(1) }) }) diff --git a/src/tools/athena-council/tools.ts b/src/tools/athena-council/tools.ts index d18389d73..ed74b6c89 100644 --- a/src/tools/athena-council/tools.ts +++ b/src/tools/athena-council/tools.ts @@ -2,11 +2,13 @@ import { tool, type ToolDefinition } from "@opencode-ai/plugin" import { executeCouncil } from "../../agents/athena/council-orchestrator" import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/types" import type { BackgroundManager } from "../../features/background-agent" +import type { BackgroundOutputClient } from "../background-task/clients" import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants" import { createCouncilLauncher } from "./council-launcher" import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard" import { waitForCouncilSessions } from "./session-waiter" -import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types" +import { collectCouncilResults } from "./result-collector" +import type { AthenaCouncilToolArgs } from "./types" function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig { return Boolean(councilConfig && councilConfig.members.length > 0) @@ -75,8 +77,9 @@ function buildToolDescription(councilConfig: CouncilConfig | undefined): string export function createAthenaCouncilTool(args: { backgroundManager: BackgroundManager councilConfig: CouncilConfig | undefined + client: BackgroundOutputClient }): ToolDefinition { - const { backgroundManager, councilConfig } = args + const { backgroundManager, councilConfig, client } = args const description = buildToolDescription(councilConfig) return tool({ @@ -113,17 +116,13 @@ export function createAthenaCouncilTool(args: { parentAgent: toolContext.agent, }) - // Wait for sessions to be created so we can register metadata for UI visibility. - // This makes council member tasks clickable in the OpenCode TUI, matching the - // behavior of the task tool (delegate-task/background-task.ts). + // Register metadata for UI visibility (makes sessions clickable in TUI). const metadataFn = (toolContext as Record).metadata as | ((input: { title?: string; metadata?: Record }) => Promise) | undefined if (metadataFn && execution.launched.length > 0) { const sessions = await waitForCouncilSessions( - execution.launched, - backgroundManager, - toolContext.abort + execution.launched, backgroundManager, toolContext.abort ) for (const session of sessions) { await metadataFn({ @@ -138,27 +137,46 @@ export function createAthenaCouncilTool(args: { } } - const launchResult: AthenaCouncilLaunchResult = { - launched: execution.launched.length, - members: execution.launched.map((entry) => ({ - task_id: entry.taskId, - name: entry.member.name ?? entry.member.model, - model: entry.member.model, - status: "running", - })), - failed: execution.failures.map((entry) => ({ - name: entry.member.name ?? entry.member.model, - model: entry.member.model, - error: entry.error, - })), - } + // Wait for all members to complete and collect their actual results. + // This eliminates the need for Athena to poll background_output repeatedly. + const collected = await collectCouncilResults( + execution.launched, backgroundManager, client, toolContext.abort + ) - markCouncilDone(toolContext.sessionID) - return JSON.stringify(launchResult) + return formatCouncilOutput(toolArgs.question, collected.results, execution.failures) } catch (error) { - markCouncilDone(toolContext.sessionID) throw error + } finally { + markCouncilDone(toolContext.sessionID) } }, }) } + +function formatCouncilOutput( + question: string, + results: Array<{ name: string; model: string; taskId: string; status: string; content: string }>, + failures: Array<{ member: { name?: string; model: string }; error: string }> +): string { + const sections: string[] = [] + + sections.push(`## Council Results\n\n**Question:** ${question}\n`) + + for (const result of results) { + const header = `### ${result.name} (${result.model})` + if (result.status !== "completed") { + sections.push(`${header}\n\n*Status: ${result.status}*`) + continue + } + sections.push(`${header}\n\n${result.content}`) + } + + if (failures.length > 0) { + const failureLines = failures + .map((f) => `- **${f.member.name ?? f.member.model}**: ${f.error}`) + .join("\n") + sections.push(`### Launch Failures\n\n${failureLines}`) + } + + return sections.join("\n\n---\n\n") +}