fix(athena): make council tool blocking — collect results directly instead of polling

The athena_council tool now waits for all council members to complete and
returns their collected results as markdown, eliminating the need for
Athena to repeatedly call background_output per member (which created
excessive UI noise).

- Add result-collector.ts that polls task status and fetches session content
- Update tool to accept BackgroundOutputClient and return formatted markdown
- Update Athena prompt to remove background_output polling steps
- Rewrite tests for new blocking behavior and markdown output format
This commit is contained in:
ismeth
2026-02-17 14:52:22 +01:00
committed by YeonGyu-Kim
parent d908a712b9
commit b0e2630db1
5 changed files with 235 additions and 108 deletions

View File

@@ -67,20 +67,14 @@ You are an ORCHESTRATOR, not an analyst. Your council members do the analysis. Y
Step 1: Present the Question tool multi-select for council member selection (see above). Once the user responds, call athena_council with the user's question. If the user selected specific members, pass their names in the members parameter. If the user selected "All Members", omit the members parameter.
Step 2: Call athena_council with the question and selected members. It returns immediately with task IDs for launched council members.
Step 2: Call athena_council with the question and selected members. The tool launches all council members in parallel, waits for them to complete, and returns ALL of their responses in a single result. This may take a few minutes — that is expected.
Step 3: After calling athena_council, use background_output(task_id="...") for each returned task ID to retrieve each member's full response.
- The system will notify you when tasks complete.
- Call background_output for each completed task.
- Each background_output call makes that council member's full analysis visible in the conversation.
- Do NOT synthesize until ALL launched members have responded.
Step 4: After collecting ALL council member responses via background_output, synthesize findings:
Step 3: Synthesize the findings returned by athena_council:
- Group findings by agreement level: unanimous, majority, minority, solo
- Solo findings are potential false positives — flag the risk explicitly
- Add your own assessment and rationale to each finding
Step 5: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take:
Step 4: Present synthesized findings to the user grouped by agreement level (unanimous first, then majority, minority, solo). Then use the Question tool to ask which action to take:
Question({
questions: [{
@@ -95,7 +89,7 @@ Question({
}]
})
Step 6: After the user selects an action:
Step 5: After the user selects an action:
- **"Fix now (Atlas)"** → Call switch_agent with agent="atlas" and context containing the confirmed findings summary, the original question, and instruction to implement the fixes.
- **"Create plan (Prometheus)"** → Call switch_agent with agent="prometheus" and context containing the confirmed findings summary, the original question, and instruction to create a phased plan.
- **"No action"** → Acknowledge and end. Do not delegate.
@@ -105,7 +99,7 @@ The switch_agent tool switches the active agent. After you call it, end your res
## Constraints
- Use the Question tool for member selection BEFORE calling athena_council (unless user pre-specified).
- Use the Question tool for action selection AFTER synthesis (unless user already stated intent).
- After athena_council, use background_output for each returned task ID before synthesizing.
- Do NOT use background_output — athena_council returns all member responses directly.
- Do NOT write or edit files directly.
- Do NOT delegate without explicit user confirmation via Question tool.
- Do NOT ignore solo finding false-positive warnings.

View File

@@ -55,6 +55,7 @@ export function createToolRegistry(args: {
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: managers.backgroundManager,
councilConfig: athenaCouncilConfig,
client: ctx.client,
})
const isMultimodalLookerEnabled = !(pluginConfig.disabled_agents ?? []).some(

View File

@@ -0,0 +1,116 @@
import type { BackgroundManager } from "../../features/background-agent"
import type { CouncilLaunchedMember } from "../../agents/athena/types"
import type { BackgroundOutputClient, BackgroundOutputMessagesResult } from "../background-task/clients"
import { extractMessages, getErrorMessage } from "../background-task/session-messages"
const POLL_INTERVAL_MS = 2_000
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1_000
export interface CollectedMemberResult {
name: string
model: string
taskId: string
status: "completed" | "error" | "cancelled" | "timeout"
content: string
}
export interface CollectedCouncilResults {
results: CollectedMemberResult[]
allCompleted: boolean
}
/**
* Waits for all launched council members to complete, then fetches their
* session messages and returns extracted text content.
*
* This replaces the previous flow where Athena had to manually poll
* background_output for each member, which created excessive UI noise.
*/
export async function collectCouncilResults(
launched: CouncilLaunchedMember[],
manager: BackgroundManager,
client: BackgroundOutputClient,
abort?: AbortSignal,
timeoutMs = DEFAULT_TIMEOUT_MS
): Promise<CollectedCouncilResults> {
const pendingIds = new Set(launched.map((m) => m.taskId))
const completedMap = new Map<string, "completed" | "error" | "cancelled">()
const deadline = Date.now() + timeoutMs
while (pendingIds.size > 0 && Date.now() < deadline) {
if (abort?.aborted) break
for (const taskId of pendingIds) {
const task = manager.getTask(taskId)
if (!task) {
completedMap.set(taskId, "error")
pendingIds.delete(taskId)
continue
}
if (task.status === "completed") {
completedMap.set(taskId, "completed")
pendingIds.delete(taskId)
} else if (task.status === "error" || task.status === "cancelled" || task.status === "interrupt") {
completedMap.set(taskId, task.status === "interrupt" ? "cancelled" : task.status)
pendingIds.delete(taskId)
}
}
if (pendingIds.size > 0) {
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
}
}
const results: CollectedMemberResult[] = []
for (const entry of launched) {
const memberName = entry.member.name ?? entry.member.model
const status = completedMap.get(entry.taskId) ?? "timeout"
if (status !== "completed") {
results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content: "" })
continue
}
const content = await fetchMemberContent(entry.taskId, manager, client)
results.push({ name: memberName, model: entry.member.model, taskId: entry.taskId, status, content })
}
return {
results,
allCompleted: pendingIds.size === 0,
}
}
async function fetchMemberContent(
taskId: string,
manager: BackgroundManager,
client: BackgroundOutputClient
): Promise<string> {
const task = manager.getTask(taskId)
if (!task?.sessionID) return "(No session available)"
const messagesResult: BackgroundOutputMessagesResult = await client.session.messages({
path: { id: task.sessionID },
})
const errorMsg = getErrorMessage(messagesResult)
if (errorMsg) return `(Error fetching results: ${errorMsg})`
const messages = extractMessages(messagesResult)
if (!Array.isArray(messages) || messages.length === 0) return "(No messages found)"
const assistantMessages = messages.filter((m) => m.info?.role === "assistant")
if (assistantMessages.length === 0) return "(No assistant response found)"
const textParts: string[] = []
for (const message of assistantMessages) {
for (const part of message.parts ?? []) {
if ((part.type === "text" || part.type === "reasoning") && part.text) {
textParts.push(part.text)
}
}
}
return textParts.join("\n\n") || "(No text content)"
}

View File

@@ -3,8 +3,21 @@
import { describe, expect, test } from "bun:test"
import type { BackgroundManager } from "../../features/background-agent"
import type { BackgroundTask } from "../../features/background-agent/types"
import type { BackgroundOutputClient } from "../background-task/clients"
import { createAthenaCouncilTool, filterCouncilMembers } from "./tools"
const mockClient = {
session: {
messages: async () => ({
data: [{
id: "msg-1",
info: { role: "assistant" },
parts: [{ type: "text", text: "Test analysis result" }],
}],
}),
},
} as unknown as BackgroundOutputClient
const mockManager = {
getTask: () => undefined,
launch: async () => {
@@ -25,27 +38,19 @@ const configuredMembers = [
{ model: "google/gemini-3-pro" },
]
function createRunningTask(id: string): BackgroundTask {
function createCompletedTask(id: string): BackgroundTask {
return {
id,
parentSessionID: "session-1",
parentMessageID: "message-1",
description: `Council member task ${id}`,
prompt: "prompt",
agent: "athena",
status: "running",
agent: "council-member",
status: "completed",
sessionID: `ses-${id}`,
}
}
function parseLaunchResult(result: unknown): {
launched: number
members: Array<{ task_id: string; name: string; model: string; status: string }>
failed: Array<{ name: string; model: string; error: string }>
} {
expect(typeof result).toBe("string")
return JSON.parse(result as string)
}
describe("filterCouncilMembers", () => {
test("returns all members when selection is undefined", () => {
// #given
@@ -142,6 +147,7 @@ describe("createAthenaCouncilTool", () => {
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: mockManager,
councilConfig: undefined,
client: mockClient,
})
// #when
@@ -156,6 +162,7 @@ describe("createAthenaCouncilTool", () => {
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: mockManager,
councilConfig: { members: [] },
client: mockClient,
})
// #when
@@ -170,6 +177,7 @@ describe("createAthenaCouncilTool", () => {
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: mockManager,
councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] },
client: mockClient,
})
// #then - description should be dynamic and include the member model
@@ -184,6 +192,7 @@ describe("createAthenaCouncilTool", () => {
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: mockManager,
councilConfig: { members: configuredMembers },
client: mockClient,
})
const toolArgs = {
question: "Who should investigate this?",
@@ -197,63 +206,54 @@ describe("createAthenaCouncilTool", () => {
expect(result).toBe("Unknown council members: unknown-model. Available members: Claude, GPT, google/gemini-3-pro.")
})
test("returns launched task_ids and member mapping for configured council", async () => {
test("returns collected markdown results for all configured council members", async () => {
// #given
let launchCount = 0
const taskStore = new Map<string, BackgroundTask>()
const launchManager = {
launch: async () => {
launchCount += 1
return createRunningTask(`bg-${launchCount}`)
const task = createCompletedTask(`bg-${launchCount}`)
taskStore.set(task.id, task)
return task
},
getTask: () => undefined,
getTask: (id: string) => taskStore.get(id),
} as unknown as BackgroundManager
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: launchManager,
councilConfig: { members: configuredMembers },
client: mockClient,
})
// #when
const result = await athenaCouncilTool.execute({ question: "How should we proceed?" }, mockToolContext)
const parsed = parseLaunchResult(result)
// #then
expect(parsed.launched).toBe(3)
expect(parsed.failed).toEqual([])
expect(parsed.members).toEqual([
{
task_id: "bg-1",
name: "Claude",
model: "anthropic/claude-sonnet-4-5",
status: "running",
},
{
task_id: "bg-2",
name: "GPT",
model: "openai/gpt-5.3-codex",
status: "running",
},
{
task_id: "bg-3",
name: "google/gemini-3-pro",
model: "google/gemini-3-pro",
status: "running",
},
])
// #then - returns markdown with council results, one section per member
expect(result).toContain("## Council Results")
expect(result).toContain("How should we proceed?")
expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)")
expect(result).toContain("### GPT (openai/gpt-5.3-codex)")
expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)")
expect(result).toContain("Test analysis result")
})
test("returns task_ids length matching selected members", async () => {
test("returns collected results only for selected members", async () => {
// #given
let launchCount = 0
const taskStore = new Map<string, BackgroundTask>()
const launchManager = {
launch: async () => {
launchCount += 1
return createRunningTask(`bg-${launchCount}`)
const task = createCompletedTask(`bg-${launchCount}`)
taskStore.set(task.id, task)
return task
},
getTask: () => undefined,
getTask: (id: string) => taskStore.get(id),
} as unknown as BackgroundManager
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: launchManager,
councilConfig: { members: configuredMembers },
client: mockClient,
})
// #when
@@ -264,54 +264,50 @@ describe("createAthenaCouncilTool", () => {
},
mockToolContext
)
const parsed = parseLaunchResult(result)
// #then
expect(parsed.launched).toBe(2)
expect(parsed.members).toHaveLength(2)
expect(parsed.members.map((member) => member.name)).toEqual(["GPT", "google/gemini-3-pro"])
// #then - only selected members appear in output
expect(result).toContain("### GPT (openai/gpt-5.3-codex)")
expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)")
expect(result).not.toContain("### Claude")
expect(launchCount).toBe(2)
})
test("returns failed launches inline while keeping successful task mappings", async () => {
test("includes launch failures alongside successful member results", async () => {
// #given
let launchCount = 0
const taskStore = new Map<string, BackgroundTask>()
const launchManager = {
launch: async () => {
launchCount += 1
if (launchCount === 2) {
throw new Error("provider outage")
}
return createRunningTask(`bg-${launchCount}`)
const task = createCompletedTask(`bg-${launchCount}`)
taskStore.set(task.id, task)
return task
},
getTask: () => undefined,
getTask: (id: string) => taskStore.get(id),
} as unknown as BackgroundManager
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: launchManager,
councilConfig: { members: configuredMembers },
client: mockClient,
})
// #when
const result = await athenaCouncilTool.execute({ question: "Any concerns?" }, mockToolContext)
const parsed = parseLaunchResult(result)
// #then
expect(parsed.launched).toBe(2)
expect(parsed.members).toHaveLength(2)
expect(parsed.failed).toHaveLength(1)
expect(parsed.failed[0]).toEqual({
name: "GPT",
model: "openai/gpt-5.3-codex",
error: "Launch failed: Error: provider outage",
})
// #then - successful members have results, failed member listed in failures section
expect(result).toContain("### Claude (anthropic/claude-sonnet-4-5)")
expect(result).toContain("### google/gemini-3-pro (google/gemini-3-pro)")
expect(result).toContain("### Launch Failures")
expect(result).toContain("**GPT**")
expect(result).toContain("provider outage")
})
test("returns dedup error when council is already running in same session", async () => {
// #given
let resolveLaunch: ((task: BackgroundTask) => void) | undefined
const pendingLaunch = new Promise<BackgroundTask>((resolve) => {
resolveLaunch = resolve
})
// #given - use a never-resolving launch to keep the first execution in-flight
const pendingLaunch = new Promise<BackgroundTask>(() => {})
const launchManager = {
launch: async () => pendingLaunch,
getTask: () => undefined,
@@ -319,17 +315,19 @@ describe("createAthenaCouncilTool", () => {
const athenaCouncilTool = createAthenaCouncilTool({
backgroundManager: launchManager,
councilConfig: { members: [{ model: "openai/gpt-5.3-codex" }] },
client: mockClient,
})
// #when
const firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext)
const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext)
// #when - first call starts but never resolves (stuck in launch)
// second call should be rejected by session guard
const _firstExecution = athenaCouncilTool.execute({ question: "First run" }, mockToolContext)
resolveLaunch?.(createRunningTask("bg-dedup"))
const firstResult = parseLaunchResult(await firstExecution)
// Allow microtask queue to process so markCouncilRunning is called
await new Promise((resolve) => setTimeout(resolve, 0))
const secondExecution = await athenaCouncilTool.execute({ question: "Second run" }, mockToolContext)
// #then
expect(secondExecution).toBe("Council is already running for this session. Wait for the current council execution to complete.")
expect(firstResult.launched).toBe(1)
})
})

View File

@@ -2,11 +2,13 @@ import { tool, type ToolDefinition } from "@opencode-ai/plugin"
import { executeCouncil } from "../../agents/athena/council-orchestrator"
import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/types"
import type { BackgroundManager } from "../../features/background-agent"
import type { BackgroundOutputClient } from "../background-task/clients"
import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
import { createCouncilLauncher } from "./council-launcher"
import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
import { waitForCouncilSessions } from "./session-waiter"
import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"
import { collectCouncilResults } from "./result-collector"
import type { AthenaCouncilToolArgs } from "./types"
function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
return Boolean(councilConfig && councilConfig.members.length > 0)
@@ -75,8 +77,9 @@ function buildToolDescription(councilConfig: CouncilConfig | undefined): string
export function createAthenaCouncilTool(args: {
backgroundManager: BackgroundManager
councilConfig: CouncilConfig | undefined
client: BackgroundOutputClient
}): ToolDefinition {
const { backgroundManager, councilConfig } = args
const { backgroundManager, councilConfig, client } = args
const description = buildToolDescription(councilConfig)
return tool({
@@ -113,17 +116,13 @@ export function createAthenaCouncilTool(args: {
parentAgent: toolContext.agent,
})
// Wait for sessions to be created so we can register metadata for UI visibility.
// This makes council member tasks clickable in the OpenCode TUI, matching the
// behavior of the task tool (delegate-task/background-task.ts).
// Register metadata for UI visibility (makes sessions clickable in TUI).
const metadataFn = (toolContext as Record<string, unknown>).metadata as
| ((input: { title?: string; metadata?: Record<string, unknown> }) => Promise<void>)
| undefined
if (metadataFn && execution.launched.length > 0) {
const sessions = await waitForCouncilSessions(
execution.launched,
backgroundManager,
toolContext.abort
execution.launched, backgroundManager, toolContext.abort
)
for (const session of sessions) {
await metadataFn({
@@ -138,27 +137,46 @@ export function createAthenaCouncilTool(args: {
}
}
const launchResult: AthenaCouncilLaunchResult = {
launched: execution.launched.length,
members: execution.launched.map((entry) => ({
task_id: entry.taskId,
name: entry.member.name ?? entry.member.model,
model: entry.member.model,
status: "running",
})),
failed: execution.failures.map((entry) => ({
name: entry.member.name ?? entry.member.model,
model: entry.member.model,
error: entry.error,
})),
}
// Wait for all members to complete and collect their actual results.
// This eliminates the need for Athena to poll background_output repeatedly.
const collected = await collectCouncilResults(
execution.launched, backgroundManager, client, toolContext.abort
)
markCouncilDone(toolContext.sessionID)
return JSON.stringify(launchResult)
return formatCouncilOutput(toolArgs.question, collected.results, execution.failures)
} catch (error) {
markCouncilDone(toolContext.sessionID)
throw error
} finally {
markCouncilDone(toolContext.sessionID)
}
},
})
}
function formatCouncilOutput(
question: string,
results: Array<{ name: string; model: string; taskId: string; status: string; content: string }>,
failures: Array<{ member: { name?: string; model: string }; error: string }>
): string {
const sections: string[] = []
sections.push(`## Council Results\n\n**Question:** ${question}\n`)
for (const result of results) {
const header = `### ${result.name} (${result.model})`
if (result.status !== "completed") {
sections.push(`${header}\n\n*Status: ${result.status}*`)
continue
}
sections.push(`${header}\n\n${result.content}`)
}
if (failures.length > 0) {
const failureLines = failures
.map((f) => `- **${f.member.name ?? f.member.model}**: ${f.error}`)
.join("\n")
sections.push(`### Launch Failures\n\n${failureLines}`)
}
return sections.join("\n\n---\n\n")
}