fix(athena): address 6 council review findings — launcher, schema, filtering, presentation

- Forward temperature and permission through council-launcher to background manager - Add LaunchInput.temperature and LaunchInput.permission to background-agent types - Extract session guard with 5-minute timeout to prevent stale council locks - Make council optional in AthenaOverrideConfigSchema for partial user overrides - Support member lookup by both name and model ID in filterCouncilMembers - Add provider/model-id format validation to CouncilMemberSchema - Fix findings-presenter group header to show finding count instead of first finding's reporter count
2026-02-13 16:00:04 +01:00
parent 7cfdc68100
commit d8ba9b1f0c
14 changed files with 327 additions and 24 deletions
--- a/src/agents/athena/council-orchestrator.ts
+++ b/src/agents/athena/council-orchestrator.ts
@@ -5,10 +5,7 @@ import { collectCouncilResults } from "./council-result-collector"
 import { parseModelString } from "./model-parser"
 import type { CouncilConfig, CouncilExecutionResult, CouncilMemberConfig, CouncilMemberResponse } from "./types"

-export interface CouncilLaunchInput extends LaunchInput {
-  temperature?: number
-  permission?: Record<string, "ask" | "allow" | "deny">
-}
+export type CouncilLaunchInput = LaunchInput

 export interface CouncilLauncher {
  launch(input: CouncilLaunchInput): Promise<BackgroundTask>
--- a/src/agents/athena/findings-presenter.test.ts
+++ b/src/agents/athena/findings-presenter.test.ts
@@ -117,6 +117,42 @@ describe("formatFindingsForUser", () => {
    expect(output).toContain("No synthesized findings are available")
  })

+  //#given multiple majority findings with different reporter counts
+  //#when formatting is generated
+  //#then group header shows the agreement level label without a misleading single count
+  test("shows agreement level label in group header without single-finding count", () => {
+    const result = createSynthesisResult({
+      findings: [
+        {
+          summary: "Finding A",
+          details: "Reported by 3 members",
+          agreementLevel: "majority",
+          reportedBy: ["OpenAI", "Claude", "Gemini"],
+          assessment: { agrees: true, rationale: "Valid" },
+          isFalsePositiveRisk: false,
+        },
+        {
+          summary: "Finding B",
+          details: "Reported by 2 members",
+          agreementLevel: "majority",
+          reportedBy: ["OpenAI", "Claude"],
+          assessment: { agrees: true, rationale: "Also valid" },
+          isFalsePositiveRisk: false,
+        },
+      ],
+    })
+
+    const output = formatFindingsForUser(result)
+
+    // The header should show the level label without a misleading single-finding count
+    // It should NOT use the first finding's count as the group header
+    expect(output).not.toContain("## Majority Findings (3 members report this (majority))")
+    expect(output).toContain("## Majority Findings")
+    // Each individual finding still shows its own agreement context
+    expect(output).toContain("Agreement context: 3 members report this (majority)")
+    expect(output).toContain("Agreement context: 2 members report this (majority)")
+  })
+
  //#given a non-empty findings result
  //#when formatting is generated
  //#then output ends with an action recommendation section
--- a/src/agents/athena/findings-presenter.ts
+++ b/src/agents/athena/findings-presenter.ts
@@ -72,8 +72,7 @@ export function formatFindingsForUser(result: SynthesisResult): string {
      return []
    }

-    const firstFinding = findings[0]
-    const header = `## ${toTitle(level)} Findings (${formatAgreementLine(level, firstFinding)})`
+    const header = `## ${toTitle(level)} Findings (${findings.length})`
    const entries = findings.map((finding) => formatFinding(level, finding)).join("\n\n")
    return [`${header}\n\n${entries}`]
  })
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -561,7 +561,7 @@ describe("Athena agent override", () => {
      expect(result.data.agents?.athena?.model).toBe("openai/gpt-5.3-codex")
      expect(result.data.agents?.athena?.temperature).toBe(0.2)
      expect(result.data.agents?.athena?.prompt_append).toBe("Use consensus-first synthesis.")
-      expect(result.data.agents?.athena?.council.members).toHaveLength(3)
+      expect(result.data.agents?.athena?.council?.members).toHaveLength(3)
    }
  })

@@ -584,7 +584,7 @@ describe("Athena agent override", () => {
    expect(result.success).toBe(false)
  })

-  test("rejects athena override when council is missing", () => {
+  test("accepts athena override without council (temperature-only override)", () => {
    // given
    const config = {
      agents: {
@@ -598,7 +598,7 @@ describe("Athena agent override", () => {
    const result = OhMyOpenCodeConfigSchema.safeParse(config)

    // then
-    expect(result.success).toBe(false)
+    expect(result.success).toBe(true)
  })
 })

--- a/src/config/schema/agent-overrides.ts
+++ b/src/config/schema/agent-overrides.ts
@@ -58,7 +58,7 @@ export const AgentOverrideConfigSchema = z.object({

 export const AthenaOverrideConfigSchema = AgentOverrideConfigSchema.merge(
  z.object({
-    council: AthenaConfigSchema.shape.council,
+    council: AthenaConfigSchema.shape.council.optional(),
  })
 )

--- a/src/config/schema/athena.test.ts
+++ b/src/config/schema/athena.test.ts
@@ -41,6 +41,50 @@ describe("CouncilMemberSchema", () => {
    expect(result.success).toBe(false)
  })

+  test("rejects model string without provider/model separator", () => {
+    //#given
+    const config = { model: "invalid-model" }
+
+    //#when
+    const result = CouncilMemberSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
+  test("rejects model string with empty provider", () => {
+    //#given
+    const config = { model: "/gpt-5.3-codex" }
+
+    //#when
+    const result = CouncilMemberSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
+  test("rejects model string with empty model ID", () => {
+    //#given
+    const config = { model: "openai/" }
+
+    //#when
+    const result = CouncilMemberSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
+  test("rejects empty model string", () => {
+    //#given
+    const config = { model: "" }
+
+    //#when
+    const result = CouncilMemberSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
  test("rejects temperature below 0", () => {
    //#given
    const config = { model: "openai/gpt-5.3-codex", temperature: -0.1 }
--- a/src/config/schema/athena.ts
+++ b/src/config/schema/athena.ts
@@ -1,7 +1,19 @@
 import { z } from "zod"

+/** Validates model string format: "provider/model-id" (e.g., "openai/gpt-5.3-codex"). */
+const ModelStringSchema = z
+  .string()
+  .min(1)
+  .refine(
+    (model) => {
+      const slashIndex = model.indexOf("/")
+      return slashIndex > 0 && slashIndex < model.length - 1
+    },
+    { message: 'Model must be in "provider/model-id" format (e.g., "openai/gpt-5.3-codex")' }
+  )
+
 export const CouncilMemberSchema = z.object({
-  model: z.string(),
+  model: ModelStringSchema,
  temperature: z.number().min(0).max(2).optional(),
  variant: z.string().optional(),
  name: z.string().optional(),
--- a/src/features/background-agent/types.ts
+++ b/src/features/background-agent/types.ts
@@ -72,6 +72,10 @@ export interface LaunchInput {
  skills?: string[]
  skillContent?: string
  category?: string
+  /** Per-task temperature override for council members or custom launches */
+  temperature?: number
+  /** Tool permission overrides (e.g., { write: "deny", edit: "deny" }) */
+  permission?: Record<string, "ask" | "allow" | "deny">
 }

 export interface ResumeInput {
--- a/src/tools/athena-council/council-launcher.test.ts
+++ b/src/tools/athena-council/council-launcher.test.ts
@@ -0,0 +1,77 @@
+import { describe, expect, test } from "bun:test"
+import type { BackgroundManager } from "../../features/background-agent"
+import type { BackgroundTask, LaunchInput } from "../../features/background-agent/types"
+import { createCouncilLauncher } from "./council-launcher"
+
+function createMockTask(id: string): BackgroundTask {
+  return {
+    id,
+    parentSessionID: "session-1",
+    parentMessageID: "message-1",
+    description: "test",
+    prompt: "test",
+    agent: "athena",
+    status: "running",
+  }
+}
+
+describe("createCouncilLauncher", () => {
+  //#given a council launch input with temperature and permission
+  //#when launch is called
+  //#then temperature and permission are forwarded to the background manager
+  test("forwards temperature and permission to background manager", async () => {
+    const capturedInputs: LaunchInput[] = []
+    const mockManager = {
+      launch: async (input: LaunchInput) => {
+        capturedInputs.push(input)
+        return createMockTask("bg-1")
+      },
+      getTask: () => undefined,
+    } as unknown as BackgroundManager
+
+    const launcher = createCouncilLauncher(mockManager)
+
+    await launcher.launch({
+      description: "Council member: test",
+      prompt: "Analyze this",
+      agent: "athena",
+      parentSessionID: "session-1",
+      parentMessageID: "message-1",
+      model: { providerID: "openai", modelID: "gpt-5.3-codex" },
+      temperature: 0.3,
+      permission: { write: "deny", edit: "deny", task: "deny" },
+    })
+
+    expect(capturedInputs).toHaveLength(1)
+    expect(capturedInputs[0]?.temperature).toBe(0.3)
+    expect(capturedInputs[0]?.permission).toEqual({ write: "deny", edit: "deny", task: "deny" })
+  })
+
+  //#given a council launch input without temperature and permission
+  //#when launch is called
+  //#then undefined temperature and permission are forwarded (not dropped)
+  test("forwards undefined temperature and permission without error", async () => {
+    const capturedInputs: LaunchInput[] = []
+    const mockManager = {
+      launch: async (input: LaunchInput) => {
+        capturedInputs.push(input)
+        return createMockTask("bg-2")
+      },
+      getTask: () => undefined,
+    } as unknown as BackgroundManager
+
+    const launcher = createCouncilLauncher(mockManager)
+
+    await launcher.launch({
+      description: "Council member: test",
+      prompt: "Analyze this",
+      agent: "athena",
+      parentSessionID: "session-1",
+      parentMessageID: "message-1",
+    })
+
+    expect(capturedInputs).toHaveLength(1)
+    expect(capturedInputs[0]?.temperature).toBeUndefined()
+    expect(capturedInputs[0]?.permission).toBeUndefined()
+  })
+})
--- a/src/tools/athena-council/council-launcher.ts
+++ b/src/tools/athena-council/council-launcher.ts
@@ -12,6 +12,8 @@ export function createCouncilLauncher(manager: BackgroundManager): CouncilLaunch
        parentMessageID: input.parentMessageID,
        parentAgent: input.parentAgent,
        model: input.model,
+        temperature: input.temperature,
+        permission: input.permission,
      })
    },
  }
--- a/src/tools/athena-council/session-guard.test.ts
+++ b/src/tools/athena-council/session-guard.test.ts
@@ -0,0 +1,72 @@
+import { afterEach, describe, expect, test } from "bun:test"
+import {
+  isCouncilRunning,
+  markCouncilDone,
+  markCouncilRunning,
+  _resetForTesting,
+  _setTimestampForTesting,
+} from "./session-guard"
+
+afterEach(() => {
+  _resetForTesting()
+})
+
+describe("session-guard", () => {
+  //#given no active sessions
+  //#when isCouncilRunning is checked
+  //#then returns false
+  test("returns false for unknown session", () => {
+    expect(isCouncilRunning("session-1")).toBe(false)
+  })
+
+  //#given a session is marked as running
+  //#when isCouncilRunning is checked
+  //#then returns true
+  test("returns true after markCouncilRunning", () => {
+    markCouncilRunning("session-1")
+
+    expect(isCouncilRunning("session-1")).toBe(true)
+  })
+
+  //#given a session was marked running then done
+  //#when isCouncilRunning is checked
+  //#then returns false
+  test("returns false after markCouncilDone", () => {
+    markCouncilRunning("session-1")
+    markCouncilDone("session-1")
+
+    expect(isCouncilRunning("session-1")).toBe(false)
+  })
+
+  //#given a session was marked running 6 minutes ago (past 5-minute timeout)
+  //#when isCouncilRunning is checked
+  //#then stale entry is purged and returns false
+  test("purges stale entries older than 5 minutes", () => {
+    const sixMinutesAgo = Date.now() - 6 * 60 * 1000
+    _setTimestampForTesting("stale-session", sixMinutesAgo)
+
+    expect(isCouncilRunning("stale-session")).toBe(false)
+  })
+
+  //#given a session was marked running 4 minutes ago (within 5-minute timeout)
+  //#when isCouncilRunning is checked
+  //#then entry is kept and returns true
+  test("keeps entries within timeout window", () => {
+    const fourMinutesAgo = Date.now() - 4 * 60 * 1000
+    _setTimestampForTesting("recent-session", fourMinutesAgo)
+
+    expect(isCouncilRunning("recent-session")).toBe(true)
+  })
+
+  //#given multiple sessions where one is stale and one is fresh
+  //#when isCouncilRunning is checked for the fresh one
+  //#then stale entry is purged but fresh entry remains
+  test("purges only stale entries while keeping fresh ones", () => {
+    const sixMinutesAgo = Date.now() - 6 * 60 * 1000
+    _setTimestampForTesting("stale-session", sixMinutesAgo)
+    markCouncilRunning("fresh-session")
+
+    expect(isCouncilRunning("stale-session")).toBe(false)
+    expect(isCouncilRunning("fresh-session")).toBe(true)
+  })
+})
--- a/src/tools/athena-council/session-guard.ts
+++ b/src/tools/athena-council/session-guard.ts
@@ -0,0 +1,37 @@
+/** Timeout in ms after which a stale council session lock is automatically released. */
+const COUNCIL_SESSION_TIMEOUT_MS = 5 * 60 * 1000
+
+/** Tracks active council executions per session with timestamps for stale entry cleanup. */
+const activeCouncilSessions = new Map<string, number>()
+
+function purgeStaleEntries(): void {
+  const now = Date.now()
+  for (const [sessionId, startedAt] of activeCouncilSessions) {
+    if (now - startedAt > COUNCIL_SESSION_TIMEOUT_MS) {
+      activeCouncilSessions.delete(sessionId)
+    }
+  }
+}
+
+export function isCouncilRunning(sessionId: string): boolean {
+  purgeStaleEntries()
+  return activeCouncilSessions.has(sessionId)
+}
+
+export function markCouncilRunning(sessionId: string): void {
+  activeCouncilSessions.set(sessionId, Date.now())
+}
+
+export function markCouncilDone(sessionId: string): void {
+  activeCouncilSessions.delete(sessionId)
+}
+
+/** Visible for testing only. */
+export function _resetForTesting(): void {
+  activeCouncilSessions.clear()
+}
+
+/** Visible for testing only. */
+export function _setTimestampForTesting(sessionId: string, timestamp: number): void {
+  activeCouncilSessions.set(sessionId, timestamp)
+}
--- a/src/tools/athena-council/tools.test.ts
+++ b/src/tools/athena-council/tools.test.ts
@@ -97,6 +97,30 @@ describe("filterCouncilMembers", () => {
    )
  })

+  test("selects named member by model ID when name differs from model", () => {
+    // #given - "Claude" has name "Claude" but model "anthropic/claude-sonnet-4-5"
+    const selectedMembers = ["anthropic/claude-sonnet-4-5"]
+
+    // #when
+    const result = filterCouncilMembers(configuredMembers, selectedMembers)
+
+    // #then - should find the member by model ID even though it has a custom name
+    expect(result.members).toEqual([configuredMembers[0]])
+    expect(result.error).toBeUndefined()
+  })
+
+  test("deduplicates when same member is selected by both name and model", () => {
+    // #given
+    const selectedMembers = ["Claude", "anthropic/claude-sonnet-4-5"]
+
+    // #when
+    const result = filterCouncilMembers(configuredMembers, selectedMembers)
+
+    // #then - should return only one copy
+    expect(result.members).toEqual([configuredMembers[0]])
+    expect(result.error).toBeUndefined()
+  })
+
  test("returns error listing only unmatched names when partially matched", () => {
    // #given
    const selectedMembers = ["claude", "non-existent"]
--- a/src/tools/athena-council/tools.ts
+++ b/src/tools/athena-council/tools.ts
@@ -4,11 +4,9 @@ import type { CouncilConfig, CouncilMemberConfig } from "../../agents/athena/typ
 import type { BackgroundManager } from "../../features/background-agent"
 import { ATHENA_COUNCIL_TOOL_DESCRIPTION_TEMPLATE } from "./constants"
 import { createCouncilLauncher } from "./council-launcher"
+import { isCouncilRunning, markCouncilDone, markCouncilRunning } from "./session-guard"
 import type { AthenaCouncilLaunchResult, AthenaCouncilToolArgs } from "./types"

-/** Tracks active council executions per session to prevent duplicate launches. */
-const activeCouncilSessions = new Set<string>()
-
 function isCouncilConfigured(councilConfig: CouncilConfig | undefined): councilConfig is CouncilConfig {
  return Boolean(councilConfig && councilConfig.members.length > 0)
 }
@@ -28,13 +26,15 @@ export function filterCouncilMembers(

  const memberLookup = new Map<string, CouncilMemberConfig>()
  members.forEach((member) => {
-    const key = (member.name ?? member.model).toLowerCase()
-    memberLookup.set(key, member)
+    memberLookup.set(member.model.toLowerCase(), member)
+    if (member.name) {
+      memberLookup.set(member.name.toLowerCase(), member)
+    }
  })

  const unresolved: string[] = []
  const filteredMembers: CouncilMemberConfig[] = []
-  const includedMemberKeys = new Set<string>()
+  const includedMembers = new Set<CouncilMemberConfig>()

  selectedNames.forEach((selectedName) => {
    const selectedKey = selectedName.toLowerCase()
@@ -44,12 +44,11 @@ export function filterCouncilMembers(
      return
    }

-    const memberKey = matchedMember.model
-    if (includedMemberKeys.has(memberKey)) {
+    if (includedMembers.has(matchedMember)) {
      return
    }

-    includedMemberKeys.add(memberKey)
+    includedMembers.add(matchedMember)
    filteredMembers.push(matchedMember)
  })

@@ -98,11 +97,11 @@ export function createAthenaCouncilTool(args: {
        return filteredMembers.error
      }

-      if (activeCouncilSessions.has(toolContext.sessionID)) {
+      if (isCouncilRunning(toolContext.sessionID)) {
        return "Council is already running for this session. Wait for the current council execution to complete."
      }

-      activeCouncilSessions.add(toolContext.sessionID)
+      markCouncilRunning(toolContext.sessionID)
      try {
        const execution = await executeCouncil({
          question: toolArgs.question,
@@ -132,10 +131,10 @@ export function createAthenaCouncilTool(args: {
            })),
        }

-        activeCouncilSessions.delete(toolContext.sessionID)
+        markCouncilDone(toolContext.sessionID)
        return JSON.stringify(launchResult)
      } catch (error) {
-        activeCouncilSessions.delete(toolContext.sessionID)
+        markCouncilDone(toolContext.sessionID)
        throw error
      }
    },