From d8ba9b1f0c70bdbbb0cd6a348e87b3278e30d471 Mon Sep 17 00:00:00 2001 From: ismeth Date: Fri, 13 Feb 2026 16:00:04 +0100 Subject: [PATCH] =?UTF-8?q?fix(athena):=20address=206=20council=20review?= =?UTF-8?q?=20findings=20=E2=80=94=20launcher,=20schema,=20filtering,=20pr?= =?UTF-8?q?esentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Forward temperature and permission through council-launcher to background manager - Add LaunchInput.temperature and LaunchInput.permission to background-agent types - Extract session guard with 5-minute timeout to prevent stale council locks - Make council optional in AthenaOverrideConfigSchema for partial user overrides - Support member lookup by both name and model ID in filterCouncilMembers - Add provider/model-id format validation to CouncilMemberSchema - Fix findings-presenter group header to show finding count instead of first finding's reporter count --- src/agents/athena/council-orchestrator.ts | 5 +- src/agents/athena/findings-presenter.test.ts | 36 +++++++++ src/agents/athena/findings-presenter.ts | 3 +- src/config/schema.test.ts | 6 +- src/config/schema/agent-overrides.ts | 2 +- src/config/schema/athena.test.ts | 44 +++++++++++ src/config/schema/athena.ts | 14 +++- src/features/background-agent/types.ts | 4 + .../athena-council/council-launcher.test.ts | 77 +++++++++++++++++++ src/tools/athena-council/council-launcher.ts | 2 + .../athena-council/session-guard.test.ts | 72 +++++++++++++++++ src/tools/athena-council/session-guard.ts | 37 +++++++++ src/tools/athena-council/tools.test.ts | 24 ++++++ src/tools/athena-council/tools.ts | 25 +++--- 14 files changed, 327 insertions(+), 24 deletions(-) create mode 100644 src/tools/athena-council/council-launcher.test.ts create mode 100644 src/tools/athena-council/session-guard.test.ts create mode 100644 src/tools/athena-council/session-guard.ts diff --git a/src/agents/athena/council-orchestrator.ts b/src/agents/athena/council-orchestrator.ts index f5ccb10fd..231f62a20 100644 --- a/src/agents/athena/council-orchestrator.ts +++ b/src/agents/athena/council-orchestrator.ts @@ -5,10 +5,7 @@ import { collectCouncilResults } from "./council-result-collector" import { parseModelString } from "./model-parser" import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types" -export interface CouncilLaunchInput extends LaunchInput { - temperature?: number - permission?: Record -} +export type CouncilLaunchInput = LaunchInput export interface CouncilLauncher { launch(input: CouncilLaunchInput): Promise diff --git a/src/agents/athena/findings-presenter.test.ts b/src/agents/athena/findings-presenter.test.ts index 3ac6a371a..7c45f2533 100644 --- a/src/agents/athena/findings-presenter.test.ts +++ b/src/agents/athena/findings-presenter.test.ts @@ -117,6 +117,42 @@ describe("formatFindingsForUser", () => { expect(output).toContain("No synthesized findings are available") }) + //#given multiple majority findings with different reporter counts + //#when formatting is generated + //#then group header shows the agreement level label without a misleading single count + test("shows agreement level label in group header without single-finding count", () => { + const result = createSynthesisResult({ + findings: [ + { + summary: "Finding A", + details: "Reported by 3 members", + agreementLevel: "majority", + reportedBy: ["OpenAI", "Claude", "Gemini"], + assessment: { agrees: true, rationale: "Valid" }, + isFalsePositiveRisk: false, + }, + { + summary: "Finding B", + details: "Reported by 2 members", + agreementLevel: "majority", + reportedBy: ["OpenAI", "Claude"], + assessment: { agrees: true, rationale: "Also valid" }, + isFalsePositiveRisk: false, + }, + ], + }) + + const output = formatFindingsForUser(result) + + // The header should show the level label without a misleading single-finding count + // It should NOT use the first finding's count as the group header + expect(output).not.toContain("## Majority Findings (3 members report this (majority))") + expect(output).toContain("## Majority Findings") + // Each individual finding still shows its own agreement context + expect(output).toContain("Agreement context: 3 members report this (majority)") + expect(output).toContain("Agreement context: 2 members report this (majority)") + }) + //#given a non-empty findings result //#when formatting is generated //#then output ends with an action recommendation section diff --git a/src/agents/athena/findings-presenter.ts b/src/agents/athena/findings-presenter.ts index 7ebf7e00f..b8470815d 100644 --- a/src/agents/athena/findings-presenter.ts +++ b/src/agents/athena/findings-presenter.ts @@ -72,8 +72,7 @@ export function formatFindingsForUser(result: SynthesisResult): string { return [] } - const firstFinding = findings[0] - const header = `## ${toTitle(level)} Findings (${formatAgreementLine(level, firstFinding)})` + const header = `## ${toTitle(level)} Findings (${findings.length})` const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n") return [`${header}\n\n${entries}`] }) diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index 72a6d780d..d8df6bffd 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -561,7 +561,7 @@ describe("Athena agent override", () => { expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex") expect(result.data.agents?.athena?.temperature).toBe(0.2) expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.") - expect(result.data.agents?.athena?.council.members).toHaveLength(3) + expect(result.data.agents?.athena?.council?.members).toHaveLength(3) } }) @@ -584,7 +584,7 @@ describe("Athena agent override", () => { expect(result.success).toBe(false) }) - test("rejects athena override when council is missing", () => { + test("accepts athena override without council (temperature-only override)", () => { // given const config = { agents: { @@ -598,7 +598,7 @@ describe("Athena agent override", () => { const result = OhMyOpenCodeConfigSchema.safeParse(config) // then - expect(result.success).toBe(false) + expect(result.success).toBe(true) }) }) diff --git a/src/config/schema/agent-overrides.ts b/src/config/schema/agent-overrides.ts index 4086c8a88..4b2cbeb48 100644 --- a/src/config/schema/agent-overrides.ts +++ b/src/config/schema/agent-overrides.ts @@ -58,7 +58,7 @@ export const AgentOverrideConfigSchema = z.object({ export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge( z.object({ - council: AthenaConfigSchema.shape.council, + council: AthenaConfigSchema.shape.council.optional(), }) ) diff --git a/src/config/schema/athena.test.ts b/src/config/schema/athena.test.ts index 4250a59fe..b698dc266 100644 --- a/src/config/schema/athena.test.ts +++ b/src/config/schema/athena.test.ts @@ -41,6 +41,50 @@ describe("CouncilMemberSchema", () => { expect(result.success).toBe(false) }) + test("rejects model string without provider/model separator", () => { + //#given + const config = { model: "invalid-model" } + + //#when + const result = CouncilMemberSchema.safeParse(config) + + //#then + expect(result.success).toBe(false) + }) + + test("rejects model string with empty provider", () => { + //#given + const config = { model: "/gpt-5.3-codex" } + + //#when + const result = CouncilMemberSchema.safeParse(config) + + //#then + expect(result.success).toBe(false) + }) + + test("rejects model string with empty model ID", () => { + //#given + const config = { model: "openai/" } + + //#when + const result = CouncilMemberSchema.safeParse(config) + + //#then + expect(result.success).toBe(false) + }) + + test("rejects empty model string", () => { + //#given + const config = { model: "" } + + //#when + const result = CouncilMemberSchema.safeParse(config) + + //#then + expect(result.success).toBe(false) + }) + test("rejects temperature below 0", () => { //#given const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 } diff --git a/src/config/schema/athena.ts b/src/config/schema/athena.ts index 5a552a41f..cbd5821f4 100644 --- a/src/config/schema/athena.ts +++ b/src/config/schema/athena.ts @@ -1,7 +1,19 @@ import { z } from "zod" +/** Validates model string format: "provider/model-id" (e.g., "openai/gpt-5.3-codex"). */ +const ModelStringSchema = z + .string() + .min(1) + .refine( + (model) => { + const slashIndex = model.indexOf("/") + return slashIndex > 0 && slashIndex < model.length - 1 + }, + { message: 'Model must be in "provider/model-id" format (e.g., "openai/gpt-5.3-codex")' } + ) + export const CouncilMemberSchema = z.object({ - model: z.string(), + model: ModelStringSchema, temperature: z.number().min(0).max(2).optional(), variant: z.string().optional(), name: z.string().optional(), diff --git a/src/features/background-agent/types.ts b/src/features/background-agent/types.ts index 6973dd783..72f0ed3d8 100644 --- a/src/features/background-agent/types.ts +++ b/src/features/background-agent/types.ts @@ -72,6 +72,10 @@ export interface LaunchInput { skills?: string[] skillContent?: string category?: string + /** Per-task temperature override for council members or custom launches */ + temperature?: number + /** Tool permission overrides (e.g., { write: "deny", edit: "deny" }) */ + permission?: Record } export interface ResumeInput { diff --git a/src/tools/athena-council/council-launcher.test.ts b/src/tools/athena-council/council-launcher.test.ts new file mode 100644 index 000000000..ee839c1c3 --- /dev/null +++ b/src/tools/athena-council/council-launcher.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, test } from "bun:test" +import type { BackgroundManager } from "../../features/background-agent" +import type { BackgroundTask, LaunchInput } from "../../features/background-agent/types" +import { createCouncilLauncher } from "./council-launcher" + +function createMockTask(id: string): BackgroundTask { + return { + id, + parentSessionID: "session-1", + parentMessageID: "message-1", + description: "test", + prompt: "test", + agent: "athena", + status: "running", + } +} + +describe("createCouncilLauncher", () => { + //#given a council launch input with temperature and permission + //#when launch is called + //#then temperature and permission are forwarded to the background manager + test("forwards temperature and permission to background manager", async () => { + const capturedInputs: LaunchInput[] = [] + const mockManager = { + launch: async (input: LaunchInput) => { + capturedInputs.push(input) + return createMockTask("bg-1") + }, + getTask: () => undefined, + } as unknown as BackgroundManager + + const launcher = createCouncilLauncher(mockManager) + + await launcher.launch({ + description: "Council member: test", + prompt: "Analyze this", + agent: "athena", + parentSessionID: "session-1", + parentMessageID: "message-1", + model: { providerID: "openai", modelID: "gpt-5.3-codex" }, + temperature: 0.3, + permission: { write: "deny", edit: "deny", task: "deny" }, + }) + + expect(capturedInputs).toHaveLength(1) + expect(capturedInputs[0]?.temperature).toBe(0.3) + expect(capturedInputs[0]?.permission).toEqual({ write: "deny", edit: "deny", task: "deny" }) + }) + + //#given a council launch input without temperature and permission + //#when launch is called + //#then undefined temperature and permission are forwarded (not dropped) + test("forwards undefined temperature and permission without error", async () => { + const capturedInputs: LaunchInput[] = [] + const mockManager = { + launch: async (input: LaunchInput) => { + capturedInputs.push(input) + return createMockTask("bg-2") + }, + getTask: () => undefined, + } as unknown as BackgroundManager + + const launcher = createCouncilLauncher(mockManager) + + await launcher.launch({ + description: "Council member: test", + prompt: "Analyze this", + agent: "athena", + parentSessionID: "session-1", + parentMessageID: "message-1", + }) + + expect(capturedInputs).toHaveLength(1) + expect(capturedInputs[0]?.temperature).toBeUndefined() + expect(capturedInputs[0]?.permission).toBeUndefined() + }) +}) diff --git a/src/tools/athena-council/council-launcher.ts b/src/tools/athena-council/council-launcher.ts index 0f83ba5c5..c690a3da1 100644 --- a/src/tools/athena-council/council-launcher.ts +++ b/src/tools/athena-council/council-launcher.ts @@ -12,6 +12,8 @@ export function createCouncilLauncher(manager: BackgroundManager): CouncilLaunch parentMessageID: input.parentMessageID, parentAgent: input.parentAgent, model: input.model, + temperature: input.temperature, + permission: input.permission, }) }, } diff --git a/src/tools/athena-council/session-guard.test.ts b/src/tools/athena-council/session-guard.test.ts new file mode 100644 index 000000000..a5c0eb2d6 --- /dev/null +++ b/src/tools/athena-council/session-guard.test.ts @@ -0,0 +1,72 @@ +import { afterEach, describe, expect, test } from "bun:test" +import { + isCouncilRunning, + markCouncilDone, + markCouncilRunning, + _resetForTesting, + _setTimestampForTesting, +} from "./session-guard" + +afterEach(() => { + _resetForTesting() +}) + +describe("session-guard", () => { + //#given no active sessions + //#when isCouncilRunning is checked + //#then returns false + test("returns false for unknown session", () => { + expect(isCouncilRunning("session-1")).toBe(false) + }) + + //#given a session is marked as running + //#when isCouncilRunning is checked + //#then returns true + test("returns true after markCouncilRunning", () => { + markCouncilRunning("session-1") + + expect(isCouncilRunning("session-1")).toBe(true) + }) + + //#given a session was marked running then done + //#when isCouncilRunning is checked + //#then returns false + test("returns false after markCouncilDone", () => { + markCouncilRunning("session-1") + markCouncilDone("session-1") + + expect(isCouncilRunning("session-1")).toBe(false) + }) + + //#given a session was marked running 6 minutes ago (past 5-minute timeout) + //#when isCouncilRunning is checked + //#then stale entry is purged and returns false + test("purges stale entries older than 5 minutes", () => { + const sixMinutesAgo = Date.now() - 6 * 60 * 1000 + _setTimestampForTesting("stale-session", sixMinutesAgo) + + expect(isCouncilRunning("stale-session")).toBe(false) + }) + + //#given a session was marked running 4 minutes ago (within 5-minute timeout) + //#when isCouncilRunning is checked + //#then entry is kept and returns true + test("keeps entries within timeout window", () => { + const fourMinutesAgo = Date.now() - 4 * 60 * 1000 + _setTimestampForTesting("recent-session", fourMinutesAgo) + + expect(isCouncilRunning("recent-session")).toBe(true) + }) + + //#given multiple sessions where one is stale and one is fresh + //#when isCouncilRunning is checked for the fresh one + //#then stale entry is purged but fresh entry remains + test("purges only stale entries while keeping fresh ones", () => { + const sixMinutesAgo = Date.now() - 6 * 60 * 1000 + _setTimestampForTesting("stale-session", sixMinutesAgo) + markCouncilRunning("fresh-session") + + expect(isCouncilRunning("stale-session")).toBe(false) + expect(isCouncilRunning("fresh-session")).toBe(true) + }) +}) diff --git a/src/tools/athena-council/session-guard.ts b/src/tools/athena-council/session-guard.ts new file mode 100644 index 000000000..94bf5143d --- /dev/null +++ b/src/tools/athena-council/session-guard.ts @@ -0,0 +1,37 @@ +/** Timeout in ms after which a stale council session lock is automatically released. */ +const COUNCIL_SESSION_TIMEOUT_MS = 5 * 60 * 1000 + +/** Tracks active council executions per session with timestamps for stale entry cleanup. */ +const activeCouncilSessions = new Map() + +function purgeStaleEntries(): void { + const now = Date.now() + for (const [sessionId, startedAt] of activeCouncilSessions) { + if (now - startedAt > COUNCIL_SESSION_TIMEOUT_MS) { + activeCouncilSessions.delete(sessionId) + } + } +} + +export function isCouncilRunning(sessionId: string): boolean { + purgeStaleEntries() + return activeCouncilSessions.has(sessionId) +} + +export function markCouncilRunning(sessionId: string): void { + activeCouncilSessions.set(sessionId, Date.now()) +} + +export function markCouncilDone(sessionId: string): void { + activeCouncilSessions.delete(sessionId) +} + +/** Visible for testing only. */ +export function _resetForTesting(): void { + activeCouncilSessions.clear() +} + +/** Visible for testing only. */ +export function _setTimestampForTesting(sessionId: string, timestamp: number): void { + activeCouncilSessions.set(sessionId, timestamp) +} diff --git a/src/tools/athena-council/tools.test.ts b/src/tools/athena-council/tools.test.ts index 6ba73ffb3..8c8b56c47 100644 --- a/src/tools/athena-council/tools.test.ts +++ b/src/tools/athena-council/tools.test.ts @@ -97,6 +97,30 @@ describe("filterCouncilMembers", () => { ) }) + test("selects named member by model ID when name differs from model", () => { + // #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5" + const selectedMembers = ["anthropic/claude-sonnet-4-5"] + + // #when + const result = filterCouncilMembers(configuredMembers, selectedMembers) + + // #then - should find the member by model ID even though it has a custom name + expect(result.members).toEqual([configuredMembers[0]]) + expect(result.error).toBeUndefined() + }) + + test("deduplicates when same member is selected by both name and model", () => { + // #given + const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"] + + // #when + const result = filterCouncilMembers(configuredMembers, selectedMembers) + + // #then - should return only one copy + expect(result.members).toEqual([configuredMembers[0]]) + expect(result.error).toBeUndefined() + }) + test("returns error listing only unmatched names when partially matched", () => { // #given const selectedMembers = ["claude", "non-existent"] diff --git a/src/tools/athena-council/tools.ts b/src/tools/athena-council/tools.ts index 15e6205c0..11c75edaf 100644 --- a/src/tools/athena-council/tools.ts +++ b/src/tools/athena-council/tools.ts @@ -4,11 +4,9 @@ import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/typ import type { BackgroundManager } from "../../features/background-agent" import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants" import { createCouncilLauncher } from "./council-launcher" +import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard" import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types" -/** Tracks active council executions per session to prevent duplicate launches. */ -const activeCouncilSessions = new Set() - function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig { return Boolean(councilConfig && councilConfig.members.length > 0) } @@ -28,13 +26,15 @@ export function filterCouncilMembers( const memberLookup = new Map() members.forEach((member) => { - const key = (member.name ?? member.model).toLowerCase() - memberLookup.set(key, member) + memberLookup.set(member.model.toLowerCase(), member) + if (member.name) { + memberLookup.set(member.name.toLowerCase(), member) + } }) const unresolved: string[] = [] const filteredMembers: CouncilMemberConfig[] = [] - const includedMemberKeys = new Set() + const includedMembers = new Set() selectedNames.forEach((selectedName) => { const selectedKey = selectedName.toLowerCase() @@ -44,12 +44,11 @@ export function filterCouncilMembers( return } - const memberKey = matchedMember.model - if (includedMemberKeys.has(memberKey)) { + if (includedMembers.has(matchedMember)) { return } - includedMemberKeys.add(memberKey) + includedMembers.add(matchedMember) filteredMembers.push(matchedMember) }) @@ -98,11 +97,11 @@ export function createAthenaCouncilTool(args: { return filteredMembers.error } - if (activeCouncilSessions.has(toolContext.sessionID)) { + if (isCouncilRunning(toolContext.sessionID)) { return "Council is already running for this session. Wait for the current council execution to complete." } - activeCouncilSessions.add(toolContext.sessionID) + markCouncilRunning(toolContext.sessionID) try { const execution = await executeCouncil({ question: toolArgs.question, @@ -132,10 +131,10 @@ export function createAthenaCouncilTool(args: { })), } - activeCouncilSessions.delete(toolContext.sessionID) + markCouncilDone(toolContext.sessionID) return JSON.stringify(launchResult) } catch (error) { - activeCouncilSessions.delete(toolContext.sessionID) + markCouncilDone(toolContext.sessionID) throw error } },