fix(athena): address 6 council review findings — launcher, schema, filtering, presentation
- Forward temperature and permission through council-launcher to background manager - Add LaunchInput.temperature and LaunchInput.permission to background-agent types - Extract session guard with 5-minute timeout to prevent stale council locks - Make council optional in AthenaOverrideConfigSchema for partial user overrides - Support member lookup by both name and model ID in filterCouncilMembers - Add provider/model-id format validation to CouncilMemberSchema - Fix findings-presenter group header to show finding count instead of first finding's reporter count
This commit is contained in:
@@ -5,10 +5,7 @@ import { collectCouncilResults } from "./council-result-collector"
|
||||
import { parseModelString } from "./model-parser"
|
||||
import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"
|
||||
|
||||
export interface CouncilLaunchInput extends LaunchInput {
|
||||
temperature?: number
|
||||
permission?: Record<string, "ask" | "allow" | "deny">
|
||||
}
|
||||
export type CouncilLaunchInput = LaunchInput
|
||||
|
||||
export interface CouncilLauncher {
|
||||
launch(input: CouncilLaunchInput): Promise<BackgroundTask>
|
||||
|
||||
@@ -117,6 +117,42 @@ describe("formatFindingsForUser", () => {
|
||||
expect(output).toContain("No synthesized findings are available")
|
||||
})
|
||||
|
||||
//#given multiple majority findings with different reporter counts
|
||||
//#when formatting is generated
|
||||
//#then group header shows the agreement level label without a misleading single count
|
||||
test("shows agreement level label in group header without single-finding count", () => {
|
||||
const result = createSynthesisResult({
|
||||
findings: [
|
||||
{
|
||||
summary: "Finding A",
|
||||
details: "Reported by 3 members",
|
||||
agreementLevel: "majority",
|
||||
reportedBy: ["OpenAI", "Claude", "Gemini"],
|
||||
assessment: { agrees: true, rationale: "Valid" },
|
||||
isFalsePositiveRisk: false,
|
||||
},
|
||||
{
|
||||
summary: "Finding B",
|
||||
details: "Reported by 2 members",
|
||||
agreementLevel: "majority",
|
||||
reportedBy: ["OpenAI", "Claude"],
|
||||
assessment: { agrees: true, rationale: "Also valid" },
|
||||
isFalsePositiveRisk: false,
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const output = formatFindingsForUser(result)
|
||||
|
||||
// The header should show the level label without a misleading single-finding count
|
||||
// It should NOT use the first finding's count as the group header
|
||||
expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
|
||||
expect(output).toContain("## Majority Findings")
|
||||
// Each individual finding still shows its own agreement context
|
||||
expect(output).toContain("Agreement context: 3 members report this (majority)")
|
||||
expect(output).toContain("Agreement context: 2 members report this (majority)")
|
||||
})
|
||||
|
||||
//#given a non-empty findings result
|
||||
//#when formatting is generated
|
||||
//#then output ends with an action recommendation section
|
||||
|
||||
@@ -72,8 +72,7 @@ export function formatFindingsForUser(result: SynthesisResult): string {
|
||||
return []
|
||||
}
|
||||
|
||||
const firstFinding = findings[0]
|
||||
const header = `## ${toTitle(level)} Findings (${formatAgreementLine(level, firstFinding)})`
|
||||
const header = `## ${toTitle(level)} Findings (${findings.length})`
|
||||
const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
|
||||
return [`${header}\n\n${entries}`]
|
||||
})
|
||||
|
||||
@@ -561,7 +561,7 @@ describe("Athena agent override", () => {
|
||||
expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex")
|
||||
expect(result.data.agents?.athena?.temperature).toBe(0.2)
|
||||
expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.")
|
||||
expect(result.data.agents?.athena?.council.members).toHaveLength(3)
|
||||
expect(result.data.agents?.athena?.council?.members).toHaveLength(3)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -584,7 +584,7 @@ describe("Athena agent override", () => {
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects athena override when council is missing", () => {
|
||||
test("accepts athena override without council (temperature-only override)", () => {
|
||||
// given
|
||||
const config = {
|
||||
agents: {
|
||||
@@ -598,7 +598,7 @@ describe("Athena agent override", () => {
|
||||
const result = OhMyOpenCodeConfigSchema.safeParse(config)
|
||||
|
||||
// then
|
||||
expect(result.success).toBe(false)
|
||||
expect(result.success).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ export const AgentOverrideConfigSchema = z.object({
|
||||
|
||||
export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge(
|
||||
z.object({
|
||||
council: AthenaConfigSchema.shape.council,
|
||||
council: AthenaConfigSchema.shape.council.optional(),
|
||||
})
|
||||
)
|
||||
|
||||
|
||||
@@ -41,6 +41,50 @@ describe("CouncilMemberSchema", () => {
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects model string without provider/model separator", () => {
|
||||
//#given
|
||||
const config = { model: "invalid-model" }
|
||||
|
||||
//#when
|
||||
const result = CouncilMemberSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects model string with empty provider", () => {
|
||||
//#given
|
||||
const config = { model: "/gpt-5.3-codex" }
|
||||
|
||||
//#when
|
||||
const result = CouncilMemberSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects model string with empty model ID", () => {
|
||||
//#given
|
||||
const config = { model: "openai/" }
|
||||
|
||||
//#when
|
||||
const result = CouncilMemberSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects empty model string", () => {
|
||||
//#given
|
||||
const config = { model: "" }
|
||||
|
||||
//#when
|
||||
const result = CouncilMemberSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects temperature below 0", () => {
|
||||
//#given
|
||||
const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 }
|
||||
|
||||
@@ -1,7 +1,19 @@
|
||||
import { z } from "zod"
|
||||
|
||||
/** Validates model string format: "provider/model-id" (e.g., "openai/gpt-5.3-codex"). */
|
||||
const ModelStringSchema = z
|
||||
.string()
|
||||
.min(1)
|
||||
.refine(
|
||||
(model) => {
|
||||
const slashIndex = model.indexOf("/")
|
||||
return slashIndex > 0 && slashIndex < model.length - 1
|
||||
},
|
||||
{ message: 'Model must be in "provider/model-id" format (e.g., "openai/gpt-5.3-codex")' }
|
||||
)
|
||||
|
||||
export const CouncilMemberSchema = z.object({
|
||||
model: z.string(),
|
||||
model: ModelStringSchema,
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
variant: z.string().optional(),
|
||||
name: z.string().optional(),
|
||||
|
||||
@@ -72,6 +72,10 @@ export interface LaunchInput {
|
||||
skills?: string[]
|
||||
skillContent?: string
|
||||
category?: string
|
||||
/** Per-task temperature override for council members or custom launches */
|
||||
temperature?: number
|
||||
/** Tool permission overrides (e.g., { write: "deny", edit: "deny" }) */
|
||||
permission?: Record<string, "ask" | "allow" | "deny">
|
||||
}
|
||||
|
||||
export interface ResumeInput {
|
||||
|
||||
77
src/tools/athena-council/council-launcher.test.ts
Normal file
77
src/tools/athena-council/council-launcher.test.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { describe, expect, test } from "bun:test"
|
||||
import type { BackgroundManager } from "../../features/background-agent"
|
||||
import type { BackgroundTask, LaunchInput } from "../../features/background-agent/types"
|
||||
import { createCouncilLauncher } from "./council-launcher"
|
||||
|
||||
function createMockTask(id: string): BackgroundTask {
|
||||
return {
|
||||
id,
|
||||
parentSessionID: "session-1",
|
||||
parentMessageID: "message-1",
|
||||
description: "test",
|
||||
prompt: "test",
|
||||
agent: "athena",
|
||||
status: "running",
|
||||
}
|
||||
}
|
||||
|
||||
describe("createCouncilLauncher", () => {
|
||||
//#given a council launch input with temperature and permission
|
||||
//#when launch is called
|
||||
//#then temperature and permission are forwarded to the background manager
|
||||
test("forwards temperature and permission to background manager", async () => {
|
||||
const capturedInputs: LaunchInput[] = []
|
||||
const mockManager = {
|
||||
launch: async (input: LaunchInput) => {
|
||||
capturedInputs.push(input)
|
||||
return createMockTask("bg-1")
|
||||
},
|
||||
getTask: () => undefined,
|
||||
} as unknown as BackgroundManager
|
||||
|
||||
const launcher = createCouncilLauncher(mockManager)
|
||||
|
||||
await launcher.launch({
|
||||
description: "Council member: test",
|
||||
prompt: "Analyze this",
|
||||
agent: "athena",
|
||||
parentSessionID: "session-1",
|
||||
parentMessageID: "message-1",
|
||||
model: { providerID: "openai", modelID: "gpt-5.3-codex" },
|
||||
temperature: 0.3,
|
||||
permission: { write: "deny", edit: "deny", task: "deny" },
|
||||
})
|
||||
|
||||
expect(capturedInputs).toHaveLength(1)
|
||||
expect(capturedInputs[0]?.temperature).toBe(0.3)
|
||||
expect(capturedInputs[0]?.permission).toEqual({ write: "deny", edit: "deny", task: "deny" })
|
||||
})
|
||||
|
||||
//#given a council launch input without temperature and permission
|
||||
//#when launch is called
|
||||
//#then undefined temperature and permission are forwarded (not dropped)
|
||||
test("forwards undefined temperature and permission without error", async () => {
|
||||
const capturedInputs: LaunchInput[] = []
|
||||
const mockManager = {
|
||||
launch: async (input: LaunchInput) => {
|
||||
capturedInputs.push(input)
|
||||
return createMockTask("bg-2")
|
||||
},
|
||||
getTask: () => undefined,
|
||||
} as unknown as BackgroundManager
|
||||
|
||||
const launcher = createCouncilLauncher(mockManager)
|
||||
|
||||
await launcher.launch({
|
||||
description: "Council member: test",
|
||||
prompt: "Analyze this",
|
||||
agent: "athena",
|
||||
parentSessionID: "session-1",
|
||||
parentMessageID: "message-1",
|
||||
})
|
||||
|
||||
expect(capturedInputs).toHaveLength(1)
|
||||
expect(capturedInputs[0]?.temperature).toBeUndefined()
|
||||
expect(capturedInputs[0]?.permission).toBeUndefined()
|
||||
})
|
||||
})
|
||||
@@ -12,6 +12,8 @@ export function createCouncilLauncher(manager: BackgroundManager): CouncilLaunch
|
||||
parentMessageID: input.parentMessageID,
|
||||
parentAgent: input.parentAgent,
|
||||
model: input.model,
|
||||
temperature: input.temperature,
|
||||
permission: input.permission,
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
72
src/tools/athena-council/session-guard.test.ts
Normal file
72
src/tools/athena-council/session-guard.test.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { afterEach, describe, expect, test } from "bun:test"
|
||||
import {
|
||||
isCouncilRunning,
|
||||
markCouncilDone,
|
||||
markCouncilRunning,
|
||||
_resetForTesting,
|
||||
_setTimestampForTesting,
|
||||
} from "./session-guard"
|
||||
|
||||
afterEach(() => {
|
||||
_resetForTesting()
|
||||
})
|
||||
|
||||
describe("session-guard", () => {
|
||||
//#given no active sessions
|
||||
//#when isCouncilRunning is checked
|
||||
//#then returns false
|
||||
test("returns false for unknown session", () => {
|
||||
expect(isCouncilRunning("session-1")).toBe(false)
|
||||
})
|
||||
|
||||
//#given a session is marked as running
|
||||
//#when isCouncilRunning is checked
|
||||
//#then returns true
|
||||
test("returns true after markCouncilRunning", () => {
|
||||
markCouncilRunning("session-1")
|
||||
|
||||
expect(isCouncilRunning("session-1")).toBe(true)
|
||||
})
|
||||
|
||||
//#given a session was marked running then done
|
||||
//#when isCouncilRunning is checked
|
||||
//#then returns false
|
||||
test("returns false after markCouncilDone", () => {
|
||||
markCouncilRunning("session-1")
|
||||
markCouncilDone("session-1")
|
||||
|
||||
expect(isCouncilRunning("session-1")).toBe(false)
|
||||
})
|
||||
|
||||
//#given a session was marked running 6 minutes ago (past 5-minute timeout)
|
||||
//#when isCouncilRunning is checked
|
||||
//#then stale entry is purged and returns false
|
||||
test("purges stale entries older than 5 minutes", () => {
|
||||
const sixMinutesAgo = Date.now() - 6 * 60 * 1000
|
||||
_setTimestampForTesting("stale-session", sixMinutesAgo)
|
||||
|
||||
expect(isCouncilRunning("stale-session")).toBe(false)
|
||||
})
|
||||
|
||||
//#given a session was marked running 4 minutes ago (within 5-minute timeout)
|
||||
//#when isCouncilRunning is checked
|
||||
//#then entry is kept and returns true
|
||||
test("keeps entries within timeout window", () => {
|
||||
const fourMinutesAgo = Date.now() - 4 * 60 * 1000
|
||||
_setTimestampForTesting("recent-session", fourMinutesAgo)
|
||||
|
||||
expect(isCouncilRunning("recent-session")).toBe(true)
|
||||
})
|
||||
|
||||
//#given multiple sessions where one is stale and one is fresh
|
||||
//#when isCouncilRunning is checked for the fresh one
|
||||
//#then stale entry is purged but fresh entry remains
|
||||
test("purges only stale entries while keeping fresh ones", () => {
|
||||
const sixMinutesAgo = Date.now() - 6 * 60 * 1000
|
||||
_setTimestampForTesting("stale-session", sixMinutesAgo)
|
||||
markCouncilRunning("fresh-session")
|
||||
|
||||
expect(isCouncilRunning("stale-session")).toBe(false)
|
||||
expect(isCouncilRunning("fresh-session")).toBe(true)
|
||||
})
|
||||
})
|
||||
37
src/tools/athena-council/session-guard.ts
Normal file
37
src/tools/athena-council/session-guard.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
/** Timeout in ms after which a stale council session lock is automatically released. */
|
||||
const COUNCIL_SESSION_TIMEOUT_MS = 5 * 60 * 1000
|
||||
|
||||
/** Tracks active council executions per session with timestamps for stale entry cleanup. */
|
||||
const activeCouncilSessions = new Map<string, number>()
|
||||
|
||||
function purgeStaleEntries(): void {
|
||||
const now = Date.now()
|
||||
for (const [sessionId, startedAt] of activeCouncilSessions) {
|
||||
if (now - startedAt > COUNCIL_SESSION_TIMEOUT_MS) {
|
||||
activeCouncilSessions.delete(sessionId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function isCouncilRunning(sessionId: string): boolean {
|
||||
purgeStaleEntries()
|
||||
return activeCouncilSessions.has(sessionId)
|
||||
}
|
||||
|
||||
export function markCouncilRunning(sessionId: string): void {
|
||||
activeCouncilSessions.set(sessionId, Date.now())
|
||||
}
|
||||
|
||||
export function markCouncilDone(sessionId: string): void {
|
||||
activeCouncilSessions.delete(sessionId)
|
||||
}
|
||||
|
||||
/** Visible for testing only. */
|
||||
export function _resetForTesting(): void {
|
||||
activeCouncilSessions.clear()
|
||||
}
|
||||
|
||||
/** Visible for testing only. */
|
||||
export function _setTimestampForTesting(sessionId: string, timestamp: number): void {
|
||||
activeCouncilSessions.set(sessionId, timestamp)
|
||||
}
|
||||
@@ -97,6 +97,30 @@ describe("filterCouncilMembers", () => {
|
||||
)
|
||||
})
|
||||
|
||||
test("selects named member by model ID when name differs from model", () => {
|
||||
// #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5"
|
||||
const selectedMembers = ["anthropic/claude-sonnet-4-5"]
|
||||
|
||||
// #when
|
||||
const result = filterCouncilMembers(configuredMembers, selectedMembers)
|
||||
|
||||
// #then - should find the member by model ID even though it has a custom name
|
||||
expect(result.members).toEqual([configuredMembers[0]])
|
||||
expect(result.error).toBeUndefined()
|
||||
})
|
||||
|
||||
test("deduplicates when same member is selected by both name and model", () => {
|
||||
// #given
|
||||
const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"]
|
||||
|
||||
// #when
|
||||
const result = filterCouncilMembers(configuredMembers, selectedMembers)
|
||||
|
||||
// #then - should return only one copy
|
||||
expect(result.members).toEqual([configuredMembers[0]])
|
||||
expect(result.error).toBeUndefined()
|
||||
})
|
||||
|
||||
test("returns error listing only unmatched names when partially matched", () => {
|
||||
// #given
|
||||
const selectedMembers = ["claude", "non-existent"]
|
||||
|
||||
@@ -4,11 +4,9 @@ import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/typ
|
||||
import type { BackgroundManager } from "../../features/background-agent"
|
||||
import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
|
||||
import { createCouncilLauncher } from "./council-launcher"
|
||||
import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
|
||||
import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"
|
||||
|
||||
/** Tracks active council executions per session to prevent duplicate launches. */
|
||||
const activeCouncilSessions = new Set<string>()
|
||||
|
||||
function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
|
||||
return Boolean(councilConfig && councilConfig.members.length > 0)
|
||||
}
|
||||
@@ -28,13 +26,15 @@ export function filterCouncilMembers(
|
||||
|
||||
const memberLookup = new Map<string, CouncilMemberConfig>()
|
||||
members.forEach((member) => {
|
||||
const key = (member.name ?? member.model).toLowerCase()
|
||||
memberLookup.set(key, member)
|
||||
memberLookup.set(member.model.toLowerCase(), member)
|
||||
if (member.name) {
|
||||
memberLookup.set(member.name.toLowerCase(), member)
|
||||
}
|
||||
})
|
||||
|
||||
const unresolved: string[] = []
|
||||
const filteredMembers: CouncilMemberConfig[] = []
|
||||
const includedMemberKeys = new Set<string>()
|
||||
const includedMembers = new Set<CouncilMemberConfig>()
|
||||
|
||||
selectedNames.forEach((selectedName) => {
|
||||
const selectedKey = selectedName.toLowerCase()
|
||||
@@ -44,12 +44,11 @@ export function filterCouncilMembers(
|
||||
return
|
||||
}
|
||||
|
||||
const memberKey = matchedMember.model
|
||||
if (includedMemberKeys.has(memberKey)) {
|
||||
if (includedMembers.has(matchedMember)) {
|
||||
return
|
||||
}
|
||||
|
||||
includedMemberKeys.add(memberKey)
|
||||
includedMembers.add(matchedMember)
|
||||
filteredMembers.push(matchedMember)
|
||||
})
|
||||
|
||||
@@ -98,11 +97,11 @@ export function createAthenaCouncilTool(args: {
|
||||
return filteredMembers.error
|
||||
}
|
||||
|
||||
if (activeCouncilSessions.has(toolContext.sessionID)) {
|
||||
if (isCouncilRunning(toolContext.sessionID)) {
|
||||
return "Council is already running for this session. Wait for the current council execution to complete."
|
||||
}
|
||||
|
||||
activeCouncilSessions.add(toolContext.sessionID)
|
||||
markCouncilRunning(toolContext.sessionID)
|
||||
try {
|
||||
const execution = await executeCouncil({
|
||||
question: toolArgs.question,
|
||||
@@ -132,10 +131,10 @@ export function createAthenaCouncilTool(args: {
|
||||
})),
|
||||
}
|
||||
|
||||
activeCouncilSessions.delete(toolContext.sessionID)
|
||||
markCouncilDone(toolContext.sessionID)
|
||||
return JSON.stringify(launchResult)
|
||||
} catch (error) {
|
||||
activeCouncilSessions.delete(toolContext.sessionID)
|
||||
markCouncilDone(toolContext.sessionID)
|
||||
throw error
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user