From b0e2630db1f4afc93316dc56d726bc179d9d8529 Mon Sep 17 00:00:00 2001
From: ismeth <ufukaltinok@gmail.com>
Date: Tue, 17 Feb 2026 14:52:22 +0100
Subject: [PATCH] =?UTF-8?q?fix(athena):=20make=20council=20tool=20blocking?=
 =?UTF-8?q?=20=E2=80=94=20collect=20results=20directly=20instead=20of=20po?=
 =?UTF-8?q?lling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The athena_council tool now waits for all council members to complete and
returns their collected results as markdown, eliminating the need for
Athena to repeatedly call background_output per member (which created
excessive UI noise).

- Add result-collector.ts that polls task status and fetches session content
- Update tool to accept BackgroundOutputClient and return formatted markdown
- Update Athena prompt to remove background_output polling steps
- Rewrite tests for new blocking behavior and markdown output format
---
 src/agents/athena/agent.ts                   |  16 +--
 src/plugin/tool-registry.ts                  |   1 +
 src/tools/athena-council/result-collector.ts | 116 +++++++++++++++
 src/tools/athena-council/tools.test.ts       | 142 +++++++++----------
 src/tools/athena-council/tools.ts            |  68 +++++----
 5 files changed, 235 insertions(+), 108 deletions(-)
 create mode 100644 src/tools/athena-council/result-collector.ts

diff --git a/src/agents/athena/agent.ts b/src/agents/athena/agent.ts
index c79c33f4b..06fa7fdf5 100644
--- a/src/agents/athena/agent.ts
+++ b/src/agents/athena/agent.ts
@@ -67,20 +67,14 @@ You are an ORCHESTRATOR, not an analyst. Your council members do the analysis. Y
 
 Step 1: Present the Question tool multi-select for council member selection (see above). Once the user responds, call athena_council with the user's question. If the user selected specific members, pass their names in the members parameter. If the user selected "All Members", omit the members parameter.
 
-Step 2: Call athena_council with the question and selected members. It returns immediately with task IDs for launched council members.
+Step 2: Call athena_council with the question and selected members. The tool launches all council members in parallel, waits for them to complete, and returns ALL of their responses in a single result. This may take a few minutes — that is expected.
 
-Step 3: After calling athena_council, use background_output(task_id="...") for each returned task ID to retrieve each member's full response.
-- The system will notify you when tasks complete.
-- Call background_output for each completed task.
-- Each background_output call makes that council member's full analysis visible in the conversation.
-- Do NOT synthesize until ALL launched members have responded.
-
-Step 4: After collecting ALL council member responses via background_output, synthesize findings:
+Step 3: Synthesize the findings returned by athena_council:
 - Group findings by agreement level: unanimous, majority, minority, solo
 - Solo findings are potential false positives — flag the risk explicitly
 - Add your own assessment and rationale to each finding
 
-Step 5: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take:
+Step 4: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take:
 
 Question({
   questions: [{
@@ -95,7 +89,7 @@ Question({
   }]
 })
 
-Step 6: After the user selects an action:
+Step 5: After the user selects an action:
 - **"Fix now (Atlas)"** → Call switch_agent with agent="atlas" and context containing the confirmed findings summary, the original question, and instruction to implement the fixes.
 - **"Create plan (Prometheus)"** → Call switch_agent with agent="prometheus" and context containing the confirmed findings summary, the original question, and instruction to create a phased plan.
 - **"No action"** → Acknowledge and end. Do not delegate.
@@ -105,7 +99,7 @@ The switch_agent tool switches the active agent. After you call it, end your res
 ## Constraints
 - Use the Question tool for member selection BEFORE calling athena_council (unless user pre-specified).
 - Use the Question tool for action selection AFTER synthesis (unless user already stated intent).
-- After athena_council, use background_output for each returned task ID before synthesizing.
+- Do NOT use background_output — athena_council returns all member responses directly.
 - Do NOT write or edit files directly.
 - Do NOT delegate without explicit user confirmation via Question tool.
 - Do NOT ignore solo finding false-positive warnings.
diff --git a/src/plugin/tool-registry.ts b/src/plugin/tool-registry.ts
index e55d94a17..787afaf3a 100644
--- a/src/plugin/tool-registry.ts
+++ b/src/plugin/tool-registry.ts
@@ -55,6 +55,7 @@ export function createToolRegistry(args: {
   const athenaCouncilTool = createAthenaCouncilTool({
     backgroundManager: managers.backgroundManager,
     councilConfig: athenaCouncilConfig,
+    client: ctx.client,
   })
 
   const isMultimodalLookerEnabled = !(pluginConfig.disabled_agents ?? []).some(
diff --git a/src/tools/athena-council/result-collector.ts b/src/tools/athena-council/result-collector.ts
new file mode 100644
index 000000000..bcfe25ef3
--- /dev/null
+++ b/src/tools/athena-council/result-collector.ts
@@ -0,0 +1,116 @@
+import type { BackgroundManager } from "../../features/background-agent"
+import type { CouncilLaunchedMember } from "../../agents/athena/types"
+import type { BackgroundOutputClient, BackgroundOutputMessagesResult } from "../background-task/clients"
+import { extractMessages, getErrorMessage } from "../background-task/session-messages"
+
+const POLL_INTERVAL_MS = 2_000
+const DEFAULT_TIMEOUT_MS = 5 * 60 * 1_000
+
+export interface CollectedMemberResult {
+  name: string
+  model: string
+  taskId: string
+  status: "completed" | "error" | "cancelled" | "timeout"
+  content: string
+}
+
+export interface CollectedCouncilResults {
+  results: CollectedMemberResult[]
+  allCompleted: boolean
+}
+
+/**
+ * Waits for all launched council members to complete, then fetches their
+ * session messages and returns extracted text content.
+ *
+ * This replaces the previous flow where Athena had to manually poll
+ * background_output for each member, which created excessive UI noise.
+ */
+export async function collectCouncilResults(
+  launched: CouncilLaunchedMember[],
+  manager: BackgroundManager,
+  client: BackgroundOutputClient,
+  abort?: AbortSignal,
+  timeoutMs = DEFAULT_TIMEOUT_MS
+): Promise<CollectedCouncilResults> {
+  const pendingIds = new Set(launched.map((m) => m.taskId))
+  const completedMap = new Map<string, "completed" | "error" | "cancelled">()
+  const deadline = Date.now() + timeoutMs
+
+  while (pendingIds.size > 0 && Date.now() < deadline) {
+    if (abort?.aborted) break
+
+    for (const taskId of pendingIds) {
+      const task = manager.getTask(taskId)
+      if (!task) {
+        completedMap.set(taskId, "error")
+        pendingIds.delete(taskId)
+        continue
+      }
+      if (task.status === "completed") {
+        completedMap.set(taskId, "completed")
+        pendingIds.delete(taskId)
+      } else if (task.status === "error" || task.status === "cancelled" || task.status === "interrupt") {
+        completedMap.set(taskId, task.status === "interrupt" ? "cancelled" : task.status)
+        pendingIds.delete(taskId)
+      }
+    }
+
+    if (pendingIds.size > 0) {
+      await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
+    }
+  }
+
+  const results: CollectedMemberResult[] = []
+
+  for (const entry of launched) {
+    const memberName = entry.member.name ?? entry.member.model
+    const status = completedMap.get(entry.taskId) ?? "timeout"
+
+    if (status !== "completed") {
+      results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content: "" })
+      continue
+    }
+
+    const content = await fetchMemberContent(entry.taskId, manager, client)
+    results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content })
+  }
+
+  return {
+    results,
+    allCompleted: pendingIds.size === 0,
+  }
+}
+
+async function fetchMemberContent(
+  taskId: string,
+  manager: BackgroundManager,
+  client: BackgroundOutputClient
+): Promise<string> {
+  const task = manager.getTask(taskId)
+  if (!task?.sessionID) return "(No session available)"
+
+  const messagesResult: BackgroundOutputMessagesResult = await client.session.messages({
+    path: { id: task.sessionID },
+  })
+
+  const errorMsg = getErrorMessage(messagesResult)
+  if (errorMsg) return `(Error fetching results: ${errorMsg})`
+
+  const messages = extractMessages(messagesResult)
+  if (!Array.isArray(messages) || messages.length === 0) return "(No messages found)"
+
+  const assistantMessages = messages.filter((m) => m.info?.role === "assistant")
+  if (assistantMessages.length === 0) return "(No assistant response found)"
+
+  const textParts: string[] = []
+  for (const message of assistantMessages) {
+    for (const part of message.parts ?? []) {
+      if ((part.type === "text" || part.type === "reasoning") && part.text) {
+        textParts.push(part.text)
+      }
+    }
+  }
+
+  return textParts.join("\n\n") || "(No text content)"
+}
diff --git a/src/tools/athena-council/tools.test.ts b/src/tools/athena-council/tools.test.ts
index 8c8b56c47..1d8890224 100644
--- a/src/tools/athena-council/tools.test.ts
+++ b/src/tools/athena-council/tools.test.ts
@@ -3,8 +3,21 @@
 import { describe, expect, test } from "bun:test"
 import type { BackgroundManager } from "../../features/background-agent"
 import type { BackgroundTask } from "../../features/background-agent/types"
+import type { BackgroundOutputClient } from "../background-task/clients"
 import { createAthenaCouncilTool, filterCouncilMembers } from "./tools"
 
+const mockClient = {
+  session: {
+    messages: async () => ({
+      data: [{
+        id: "msg-1",
+        info: { role: "assistant" },
+        parts: [{ type: "text", text: "Test analysis result" }],
+      }],
+    }),
+  },
+} as unknown as BackgroundOutputClient
+
 const mockManager = {
   getTask: () => undefined,
   launch: async () => {
@@ -25,27 +38,19 @@ const configuredMembers = [
   { model: "google/gemini-3-pro" },
 ]
 
-function createRunningTask(id: string): BackgroundTask {
+function createCompletedTask(id: string): BackgroundTask {
   return {
     id,
     parentSessionID: "session-1",
     parentMessageID: "message-1",
     description: `Council member task ${id}`,
     prompt: "prompt",
-    agent: "athena",
-    status: "running",
+    agent: "council-member",
+    status: "completed",
+    sessionID: `ses-${id}`,
   }
 }
 
-function parseLaunchResult(result: unknown): {
-  launched: number
-  members: Array<{ task_id: string; name: string; model: string; status: string }>
-  failed: Array<{ name: string; model: string; error: string }>
-} {
-  expect(typeof result).toBe("string")
-  return JSON.parse(result as string)
-}
-
 describe("filterCouncilMembers", () => {
   test("returns all members when selection is undefined", () => {
     // #given
@@ -142,6 +147,7 @@ describe("createAthenaCouncilTool", () => {
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: mockManager,
       councilConfig: undefined,
+      client: mockClient,
     })
 
     // #when
@@ -156,6 +162,7 @@ describe("createAthenaCouncilTool", () => {
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: mockManager,
       councilConfig: { members: [] },
+      client: mockClient,
     })
 
     // #when
@@ -170,6 +177,7 @@ describe("createAthenaCouncilTool", () => {
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: mockManager,
       councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] },
+      client: mockClient,
     })
 
     // #then - description should be dynamic and include the member model
@@ -184,6 +192,7 @@ describe("createAthenaCouncilTool", () => {
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: mockManager,
       councilConfig: { members: configuredMembers },
+      client: mockClient,
     })
     const toolArgs = {
       question: "Who should investigate this?",
@@ -197,63 +206,54 @@ describe("createAthenaCouncilTool", () => {
     expect(result).toBe("Unknown council members: unknown-model. Available members: Claude, GPT, google/gemini-3-pro.")
   })
 
-  test("returns launched task_ids and member mapping for configured council", async () => {
+  test("returns collected markdown results for all configured council members", async () => {
     // #given
     let launchCount = 0
+    const taskStore = new Map<string, BackgroundTask>()
     const launchManager = {
       launch: async () => {
         launchCount += 1
-        return createRunningTask(`bg-${launchCount}`)
+        const task = createCompletedTask(`bg-${launchCount}`)
+        taskStore.set(task.id, task)
+        return task
       },
-      getTask: () => undefined,
+      getTask: (id: string) => taskStore.get(id),
     } as unknown as BackgroundManager
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: launchManager,
       councilConfig: { members: configuredMembers },
+      client: mockClient,
     })
 
     // #when
     const result = await athenaCouncilTool.execute({ question: "How should we proceed?" }, mockToolContext)
-    const parsed = parseLaunchResult(result)
 
-    // #then
-    expect(parsed.launched).toBe(3)
-    expect(parsed.failed).toEqual([])
-    expect(parsed.members).toEqual([
-      {
-        task_id: "bg-1",
-        name: "Claude",
-        model: "anthropic/claude-sonnet-4-5",
-        status: "running",
-      },
-      {
-        task_id: "bg-2",
-        name: "GPT",
-        model: "openai/gpt-5.3-codex",
-        status: "running",
-      },
-      {
-        task_id: "bg-3",
-        name: "google/gemini-3-pro",
-        model: "google/gemini-3-pro",
-        status: "running",
-      },
-    ])
+    // #then - returns markdown with council results, one section per member
+    expect(result).toContain("## Council Results")
+    expect(result).toContain("How should we proceed?")
+    expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)")
+    expect(result).toContain("### GPT (openai/gpt-5.3-codex)")
+    expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)")
+    expect(result).toContain("Test analysis result")
   })
 
-  test("returns task_ids length matching selected members", async () => {
+  test("returns collected results only for selected members", async () => {
     // #given
     let launchCount = 0
+    const taskStore = new Map<string, BackgroundTask>()
     const launchManager = {
       launch: async () => {
         launchCount += 1
-        return createRunningTask(`bg-${launchCount}`)
+        const task = createCompletedTask(`bg-${launchCount}`)
+        taskStore.set(task.id, task)
+        return task
       },
-      getTask: () => undefined,
+      getTask: (id: string) => taskStore.get(id),
     } as unknown as BackgroundManager
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: launchManager,
       councilConfig: { members: configuredMembers },
+      client: mockClient,
     })
 
     // #when
@@ -264,54 +264,50 @@ describe("createAthenaCouncilTool", () => {
       },
       mockToolContext
     )
-    const parsed = parseLaunchResult(result)
 
-    // #then
-    expect(parsed.launched).toBe(2)
-    expect(parsed.members).toHaveLength(2)
-    expect(parsed.members.map((member) => member.name)).toEqual(["GPT", "google/gemini-3-pro"])
+    // #then - only selected members appear in output
+    expect(result).toContain("### GPT (openai/gpt-5.3-codex)")
+    expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)")
+    expect(result).not.toContain("### Claude")
+    expect(launchCount).toBe(2)
   })
 
-  test("returns failed launches inline while keeping successful task mappings", async () => {
+  test("includes launch failures alongside successful member results", async () => {
     // #given
     let launchCount = 0
+    const taskStore = new Map<string, BackgroundTask>()
     const launchManager = {
       launch: async () => {
         launchCount += 1
         if (launchCount === 2) {
           throw new Error("provider outage")
         }
-
-        return createRunningTask(`bg-${launchCount}`)
+        const task = createCompletedTask(`bg-${launchCount}`)
+        taskStore.set(task.id, task)
+        return task
       },
-      getTask: () => undefined,
+      getTask: (id: string) => taskStore.get(id),
     } as unknown as BackgroundManager
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: launchManager,
       councilConfig: { members: configuredMembers },
+      client: mockClient,
     })
 
     // #when
     const result = await athenaCouncilTool.execute({ question: "Any concerns?" }, mockToolContext)
-    const parsed = parseLaunchResult(result)
 
-    // #then
-    expect(parsed.launched).toBe(2)
-    expect(parsed.members).toHaveLength(2)
-    expect(parsed.failed).toHaveLength(1)
-    expect(parsed.failed[0]).toEqual({
-      name: "GPT",
-      model: "openai/gpt-5.3-codex",
-      error: "Launch failed: Error: provider outage",
-    })
+    // #then - successful members have results, failed member listed in failures section
+    expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)")
+    expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)")
+    expect(result).toContain("### Launch Failures")
+    expect(result).toContain("**GPT**")
+    expect(result).toContain("provider outage")
   })
 
   test("returns dedup error when council is already running in same session", async () => {
-    // #given
-    let resolveLaunch: ((task: BackgroundTask) => void) | undefined
-    const pendingLaunch = new Promise<BackgroundTask>((resolve) => {
-      resolveLaunch = resolve
-    })
+    // #given - use a never-resolving launch to keep the first execution in-flight
+    const pendingLaunch = new Promise<BackgroundTask>(() => {})
     const launchManager = {
       launch: async () => pendingLaunch,
       getTask: () => undefined,
@@ -319,17 +315,19 @@ describe("createAthenaCouncilTool", () => {
     const athenaCouncilTool = createAthenaCouncilTool({
       backgroundManager: launchManager,
       councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] },
+      client: mockClient,
     })
 
-    // #when
-    const firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext)
-    const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext)
+    // #when - first call starts but never resolves (stuck in launch)
+    // second call should be rejected by session guard
+    const _firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext)
 
-    resolveLaunch?.(createRunningTask("bg-dedup"))
-    const firstResult = parseLaunchResult(await firstExecution)
+    // Allow microtask queue to process so markCouncilRunning is called
+    await new Promise((resolve) => setTimeout(resolve, 0))
+
+    const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext)
 
     // #then
     expect(secondExecution).toBe("Council is already running for this session. Wait for the current council execution to complete.")
-    expect(firstResult.launched).toBe(1)
   })
 })
diff --git a/src/tools/athena-council/tools.ts b/src/tools/athena-council/tools.ts
index d18389d73..ed74b6c89 100644
--- a/src/tools/athena-council/tools.ts
+++ b/src/tools/athena-council/tools.ts
@@ -2,11 +2,13 @@ import { tool, type ToolDefinition } from "@opencode-ai/plugin"
 import { executeCouncil } from "../../agents/athena/council-orchestrator"
 import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/types"
 import type { BackgroundManager } from "../../features/background-agent"
+import type { BackgroundOutputClient } from "../background-task/clients"
 import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
 import { createCouncilLauncher } from "./council-launcher"
 import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
 import { waitForCouncilSessions } from "./session-waiter"
-import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"
+import { collectCouncilResults } from "./result-collector"
+import type { AthenaCouncilToolArgs } from "./types"
 
 function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
   return Boolean(councilConfig && councilConfig.members.length > 0)
@@ -75,8 +77,9 @@ function buildToolDescription(councilConfig: CouncilConfig | undefined): string
 export function createAthenaCouncilTool(args: {
   backgroundManager: BackgroundManager
   councilConfig: CouncilConfig | undefined
+  client: BackgroundOutputClient
 }): ToolDefinition {
-  const { backgroundManager, councilConfig } = args
+  const { backgroundManager, councilConfig, client } = args
   const description = buildToolDescription(councilConfig)
 
   return tool({
@@ -113,17 +116,13 @@ export function createAthenaCouncilTool(args: {
           parentAgent: toolContext.agent,
         })
 
-        // Wait for sessions to be created so we can register metadata for UI visibility.
-        // This makes council member tasks clickable in the OpenCode TUI, matching the
-        // behavior of the task tool (delegate-task/background-task.ts).
+        // Register metadata for UI visibility (makes sessions clickable in TUI).
         const metadataFn = (toolContext as Record<string, unknown>).metadata as
           | ((input: { title?: string; metadata?: Record<string, unknown> }) => Promise<void>)
           | undefined
         if (metadataFn && execution.launched.length > 0) {
           const sessions = await waitForCouncilSessions(
-            execution.launched,
-            backgroundManager,
-            toolContext.abort
+            execution.launched, backgroundManager, toolContext.abort
           )
           for (const session of sessions) {
             await metadataFn({
@@ -138,27 +137,46 @@ export function createAthenaCouncilTool(args: {
           }
         }
 
-        const launchResult: AthenaCouncilLaunchResult = {
-          launched: execution.launched.length,
-          members: execution.launched.map((entry) => ({
-            task_id: entry.taskId,
-            name: entry.member.name ?? entry.member.model,
-            model: entry.member.model,
-            status: "running",
-          })),
-          failed: execution.failures.map((entry) => ({
-            name: entry.member.name ?? entry.member.model,
-            model: entry.member.model,
-            error: entry.error,
-          })),
-        }
+        // Wait for all members to complete and collect their actual results.
+        // This eliminates the need for Athena to poll background_output repeatedly.
+        const collected = await collectCouncilResults(
+          execution.launched, backgroundManager, client, toolContext.abort
+        )
 
-        markCouncilDone(toolContext.sessionID)
-        return JSON.stringify(launchResult)
+        return formatCouncilOutput(toolArgs.question, collected.results, execution.failures)
       } catch (error) {
-        markCouncilDone(toolContext.sessionID)
         throw error
+      } finally {
+        markCouncilDone(toolContext.sessionID)
       }
     },
   })
 }
+
+function formatCouncilOutput(
+  question: string,
+  results: Array<{ name: string; model: string; taskId: string; status: string; content: string }>,
+  failures: Array<{ member: { name?: string; model: string }; error: string }>
+): string {
+  const sections: string[] = []
+
+  sections.push(`## Council Results\n\n**Question:** ${question}\n`)
+
+  for (const result of results) {
+    const header = `### ${result.name} (${result.model})`
+    if (result.status !== "completed") {
+      sections.push(`${header}\n\n*Status: ${result.status}*`)
+      continue
+    }
+    sections.push(`${header}\n\n${result.content}`)
+  }
+
+  if (failures.length > 0) {
+    const failureLines = failures
+      .map((f) => `- **${f.member.name ?? f.member.model}**: ${f.error}`)
+      .join("\n")
+    sections.push(`### Launch Failures\n\n${failureLines}`)
+  }
+
+  return sections.join("\n\n---\n\n")
+}