fix(athena): address 6 council review findings — launcher, schema, filtering, presentation

- Forward temperature and permission through council-launcher to background manager
- Add LaunchInput.temperature and LaunchInput.permission to background-agent types
- Extract session guard with 5-minute timeout to prevent stale council locks
- Make council optional in AthenaOverrideConfigSchema for partial user overrides
- Support member lookup by both name and model ID in filterCouncilMembers
- Add provider/model-id format validation to CouncilMemberSchema
- Fix findings-presenter group header to show finding count instead of first finding's reporter count
This commit is contained in:
ismeth
2026-02-13 16:00:04 +01:00
committed by YeonGyu-Kim
parent 7cfdc68100
commit d8ba9b1f0c
14 changed files with 327 additions and 24 deletions

View File

@@ -5,10 +5,7 @@ import { collectCouncilResults } from "./council-result-collector"
import { parseModelString } from "./model-parser"
import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
export interface CouncilLaunchInput extends LaunchInput {
temperature?: number
permission?: Record<string, "ask" | "allow" | "deny">
}
export type CouncilLaunchInput = LaunchInput
export interface CouncilLauncher {
launch(input: CouncilLaunchInput): Promise<BackgroundTask>

View File

@@ -117,6 +117,42 @@ describe("formatFindingsForUser", () => {
expect(output).toContain("No synthesized findings are available")
})
//#given multiple majority findings with different reporter counts
//#when formatting is generated
//#then group header shows the agreement level label without a misleading single count
test("shows agreement level label in group header without single-finding count", () => {
const result = createSynthesisResult({
findings: [
{
summary: "Finding A",
details: "Reported by 3 members",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude", "Gemini"],
assessment: { agrees: true, rationale: "Valid" },
isFalsePositiveRisk: false,
},
{
summary: "Finding B",
details: "Reported by 2 members",
agreementLevel: "majority",
reportedBy: ["OpenAI", "Claude"],
assessment: { agrees: true, rationale: "Also valid" },
isFalsePositiveRisk: false,
},
],
})
const output = formatFindingsForUser(result)
// The header should show the level label without a misleading single-finding count
// It should NOT use the first finding's count as the group header
expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
expect(output).toContain("## Majority Findings")
// Each individual finding still shows its own agreement context
expect(output).toContain("Agreement context: 3 members report this (majority)")
expect(output).toContain("Agreement context: 2 members report this (majority)")
})
//#given a non-empty findings result
//#when formatting is generated
//#then output ends with an action recommendation section

View File

@@ -72,8 +72,7 @@ export function formatFindingsForUser(result: SynthesisResult): string {
return []
}
const firstFinding = findings[0]
const header = `## ${toTitle(level)} Findings (${formatAgreementLine(level, firstFinding)})`
const header = `## ${toTitle(level)} Findings (${findings.length})`
const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
return [`${header}\n\n${entries}`]
})

View File

@@ -561,7 +561,7 @@ describe("Athena agent override", () => {
expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex")
expect(result.data.agents?.athena?.temperature).toBe(0.2)
expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.")
expect(result.data.agents?.athena?.council.members).toHaveLength(3)
expect(result.data.agents?.athena?.council?.members).toHaveLength(3)
}
})
@@ -584,7 +584,7 @@ describe("Athena agent override", () => {
expect(result.success).toBe(false)
})
test("rejects athena override when council is missing", () => {
test("accepts athena override without council (temperature-only override)", () => {
// given
const config = {
agents: {
@@ -598,7 +598,7 @@ describe("Athena agent override", () => {
const result = OhMyOpenCodeConfigSchema.safeParse(config)
// then
expect(result.success).toBe(false)
expect(result.success).toBe(true)
})
})

View File

@@ -58,7 +58,7 @@ export const AgentOverrideConfigSchema = z.object({
export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge(
z.object({
council: AthenaConfigSchema.shape.council,
council: AthenaConfigSchema.shape.council.optional(),
})
)

View File

@@ -41,6 +41,50 @@ describe("CouncilMemberSchema", () => {
expect(result.success).toBe(false)
})
test("rejects model string without provider/model separator", () => {
//#given
const config = { model: "invalid-model" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects model string with empty provider", () => {
//#given
const config = { model: "/gpt-5.3-codex" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects model string with empty model ID", () => {
//#given
const config = { model: "openai/" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects empty model string", () => {
//#given
const config = { model: "" }
//#when
const result = CouncilMemberSchema.safeParse(config)
//#then
expect(result.success).toBe(false)
})
test("rejects temperature below 0", () => {
//#given
const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 }

View File

@@ -1,7 +1,19 @@
import { z } from "zod"
/** Validates model string format: "provider/model-id" (e.g., "openai/gpt-5.3-codex"). */
const ModelStringSchema = z
.string()
.min(1)
.refine(
(model) => {
const slashIndex = model.indexOf("/")
return slashIndex > 0 && slashIndex < model.length - 1
},
{ message: 'Model must be in "provider/model-id" format (e.g., "openai/gpt-5.3-codex")' }
)
export const CouncilMemberSchema = z.object({
model: z.string(),
model: ModelStringSchema,
temperature: z.number().min(0).max(2).optional(),
variant: z.string().optional(),
name: z.string().optional(),

View File

@@ -72,6 +72,10 @@ export interface LaunchInput {
skills?: string[]
skillContent?: string
category?: string
/** Per-task temperature override for council members or custom launches */
temperature?: number
/** Tool permission overrides (e.g., { write: "deny", edit: "deny" }) */
permission?: Record<string, "ask" | "allow" | "deny">
}
export interface ResumeInput {

View File

@@ -0,0 +1,77 @@
import { describe, expect, test } from "bun:test"
import type { BackgroundManager } from "../../features/background-agent"
import type { BackgroundTask, LaunchInput } from "../../features/background-agent/types"
import { createCouncilLauncher } from "./council-launcher"
function createMockTask(id: string): BackgroundTask {
return {
id,
parentSessionID: "session-1",
parentMessageID: "message-1",
description: "test",
prompt: "test",
agent: "athena",
status: "running",
}
}
describe("createCouncilLauncher", () => {
//#given a council launch input with temperature and permission
//#when launch is called
//#then temperature and permission are forwarded to the background manager
test("forwards temperature and permission to background manager", async () => {
const capturedInputs: LaunchInput[] = []
const mockManager = {
launch: async (input: LaunchInput) => {
capturedInputs.push(input)
return createMockTask("bg-1")
},
getTask: () => undefined,
} as unknown as BackgroundManager
const launcher = createCouncilLauncher(mockManager)
await launcher.launch({
description: "Council member: test",
prompt: "Analyze this",
agent: "athena",
parentSessionID: "session-1",
parentMessageID: "message-1",
model: { providerID: "openai", modelID: "gpt-5.3-codex" },
temperature: 0.3,
permission: { write: "deny", edit: "deny", task: "deny" },
})
expect(capturedInputs).toHaveLength(1)
expect(capturedInputs[0]?.temperature).toBe(0.3)
expect(capturedInputs[0]?.permission).toEqual({ write: "deny", edit: "deny", task: "deny" })
})
//#given a council launch input without temperature and permission
//#when launch is called
//#then undefined temperature and permission are forwarded (not dropped)
test("forwards undefined temperature and permission without error", async () => {
const capturedInputs: LaunchInput[] = []
const mockManager = {
launch: async (input: LaunchInput) => {
capturedInputs.push(input)
return createMockTask("bg-2")
},
getTask: () => undefined,
} as unknown as BackgroundManager
const launcher = createCouncilLauncher(mockManager)
await launcher.launch({
description: "Council member: test",
prompt: "Analyze this",
agent: "athena",
parentSessionID: "session-1",
parentMessageID: "message-1",
})
expect(capturedInputs).toHaveLength(1)
expect(capturedInputs[0]?.temperature).toBeUndefined()
expect(capturedInputs[0]?.permission).toBeUndefined()
})
})

View File

@@ -12,6 +12,8 @@ export function createCouncilLauncher(manager: BackgroundManager): CouncilLaunch
parentMessageID: input.parentMessageID,
parentAgent: input.parentAgent,
model: input.model,
temperature: input.temperature,
permission: input.permission,
})
},
}

View File

@@ -0,0 +1,72 @@
import { afterEach, describe, expect, test } from "bun:test"
import {
isCouncilRunning,
markCouncilDone,
markCouncilRunning,
_resetForTesting,
_setTimestampForTesting,
} from "./session-guard"
afterEach(() => {
_resetForTesting()
})
describe("session-guard", () => {
//#given no active sessions
//#when isCouncilRunning is checked
//#then returns false
test("returns false for unknown session", () => {
expect(isCouncilRunning("session-1")).toBe(false)
})
//#given a session is marked as running
//#when isCouncilRunning is checked
//#then returns true
test("returns true after markCouncilRunning", () => {
markCouncilRunning("session-1")
expect(isCouncilRunning("session-1")).toBe(true)
})
//#given a session was marked running then done
//#when isCouncilRunning is checked
//#then returns false
test("returns false after markCouncilDone", () => {
markCouncilRunning("session-1")
markCouncilDone("session-1")
expect(isCouncilRunning("session-1")).toBe(false)
})
//#given a session was marked running 6 minutes ago (past 5-minute timeout)
//#when isCouncilRunning is checked
//#then stale entry is purged and returns false
test("purges stale entries older than 5 minutes", () => {
const sixMinutesAgo = Date.now() - 6 * 60 * 1000
_setTimestampForTesting("stale-session", sixMinutesAgo)
expect(isCouncilRunning("stale-session")).toBe(false)
})
//#given a session was marked running 4 minutes ago (within 5-minute timeout)
//#when isCouncilRunning is checked
//#then entry is kept and returns true
test("keeps entries within timeout window", () => {
const fourMinutesAgo = Date.now() - 4 * 60 * 1000
_setTimestampForTesting("recent-session", fourMinutesAgo)
expect(isCouncilRunning("recent-session")).toBe(true)
})
//#given multiple sessions where one is stale and one is fresh
//#when isCouncilRunning is checked for the fresh one
//#then stale entry is purged but fresh entry remains
test("purges only stale entries while keeping fresh ones", () => {
const sixMinutesAgo = Date.now() - 6 * 60 * 1000
_setTimestampForTesting("stale-session", sixMinutesAgo)
markCouncilRunning("fresh-session")
expect(isCouncilRunning("stale-session")).toBe(false)
expect(isCouncilRunning("fresh-session")).toBe(true)
})
})

View File

@@ -0,0 +1,37 @@
/** Timeout in ms after which a stale council session lock is automatically released. */
const COUNCIL_SESSION_TIMEOUT_MS = 5 * 60 * 1000
/** Tracks active council executions per session with timestamps for stale entry cleanup. */
const activeCouncilSessions = new Map<string, number>()
function purgeStaleEntries(): void {
const now = Date.now()
for (const [sessionId, startedAt] of activeCouncilSessions) {
if (now - startedAt > COUNCIL_SESSION_TIMEOUT_MS) {
activeCouncilSessions.delete(sessionId)
}
}
}
export function isCouncilRunning(sessionId: string): boolean {
purgeStaleEntries()
return activeCouncilSessions.has(sessionId)
}
export function markCouncilRunning(sessionId: string): void {
activeCouncilSessions.set(sessionId, Date.now())
}
export function markCouncilDone(sessionId: string): void {
activeCouncilSessions.delete(sessionId)
}
/** Visible for testing only. */
export function _resetForTesting(): void {
activeCouncilSessions.clear()
}
/** Visible for testing only. */
export function _setTimestampForTesting(sessionId: string, timestamp: number): void {
activeCouncilSessions.set(sessionId, timestamp)
}

View File

@@ -97,6 +97,30 @@ describe("filterCouncilMembers", () => {
)
})
test("selects named member by model ID when name differs from model", () => {
// #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5"
const selectedMembers = ["anthropic/claude-sonnet-4-5"]
// #when
const result = filterCouncilMembers(configuredMembers, selectedMembers)
// #then - should find the member by model ID even though it has a custom name
expect(result.members).toEqual([configuredMembers[0]])
expect(result.error).toBeUndefined()
})
test("deduplicates when same member is selected by both name and model", () => {
// #given
const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"]
// #when
const result = filterCouncilMembers(configuredMembers, selectedMembers)
// #then - should return only one copy
expect(result.members).toEqual([configuredMembers[0]])
expect(result.error).toBeUndefined()
})
test("returns error listing only unmatched names when partially matched", () => {
// #given
const selectedMembers = ["claude", "non-existent"]

View File

@@ -4,11 +4,9 @@ import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/typ
import type { BackgroundManager } from "../../features/background-agent"
import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
import { createCouncilLauncher } from "./council-launcher"
import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"
/** Tracks active council executions per session to prevent duplicate launches. */
const activeCouncilSessions = new Set<string>()
function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
return Boolean(councilConfig && councilConfig.members.length > 0)
}
@@ -28,13 +26,15 @@ export function filterCouncilMembers(
const memberLookup = new Map<string, CouncilMemberConfig>()
members.forEach((member) => {
const key = (member.name ?? member.model).toLowerCase()
memberLookup.set(key, member)
memberLookup.set(member.model.toLowerCase(), member)
if (member.name) {
memberLookup.set(member.name.toLowerCase(), member)
}
})
const unresolved: string[] = []
const filteredMembers: CouncilMemberConfig[] = []
const includedMemberKeys = new Set<string>()
const includedMembers = new Set<CouncilMemberConfig>()
selectedNames.forEach((selectedName) => {
const selectedKey = selectedName.toLowerCase()
@@ -44,12 +44,11 @@ export function filterCouncilMembers(
return
}
const memberKey = matchedMember.model
if (includedMemberKeys.has(memberKey)) {
if (includedMembers.has(matchedMember)) {
return
}
includedMemberKeys.add(memberKey)
includedMembers.add(matchedMember)
filteredMembers.push(matchedMember)
})
@@ -98,11 +97,11 @@ export function createAthenaCouncilTool(args: {
return filteredMembers.error
}
if (activeCouncilSessions.has(toolContext.sessionID)) {
if (isCouncilRunning(toolContext.sessionID)) {
return "Council is already running for this session. Wait for the current council execution to complete."
}
activeCouncilSessions.add(toolContext.sessionID)
markCouncilRunning(toolContext.sessionID)
try {
const execution = await executeCouncil({
question: toolArgs.question,
@@ -132,10 +131,10 @@ export function createAthenaCouncilTool(args: {
})),
}
activeCouncilSessions.delete(toolContext.sessionID)
markCouncilDone(toolContext.sessionID)
return JSON.stringify(launchResult)
} catch (error) {
activeCouncilSessions.delete(toolContext.sessionID)
markCouncilDone(toolContext.sessionID)
throw error
}
},