🏆 test: optimize test suite with FakeTimers and race condition fixes (#1284)

* fix: exclude prompt/permission from plan agent config plan agent should only inherit model settings from prometheus, not the prompt or permission. This ensures plan agent uses OpenCode's default behavior while only overriding the model. * test(todo-continuation-enforcer): use FakeTimers for 15x faster tests - Add custom FakeTimers implementation (~100 lines) - Replace all real setTimeout waits with fakeTimers.advanceBy() - Test time: 104.6s → 7.01s * test(callback-server): fix race conditions with Promise.all and Bun.fetch - Use Bun.fetch.bind(Bun) to avoid globalThis.fetch mock interference - Use Promise.all pattern for concurrent fetch/waitForCallback - Add Bun.sleep(10) in afterEach for port release * test(concurrency): replace placeholder assertions with getCount checks Replace 6 meaningless expect(true).toBe(true) assertions with actual getCount() verifications for test quality improvement * refactor(config-handler): simplify planDemoteConfig creation Remove unnecessary IIFE and destructuring, use direct spread instead * test(executor): use FakeTimeouts for faster tests - Add custom FakeTimeouts implementation - Replace setTimeout waits with fakeTimeouts.advanceBy() - Test time reduced from ~26s to ~6.8s * test: fix gemini model mock for artistry unstable mode * test: fix model list mock payload shape * test: mock provider models for artistry category --------- Co-authored-by: justsisyphus <justsisyphus@users.noreply.github.com>
2026-01-30 22:10:52 +09:00
parent 3ced20d1ab
commit 8c2625cfb0
5 changed files with 285 additions and 76 deletions
--- a/src/features/background-agent/concurrency.test.ts
+++ b/src/features/background-agent/concurrency.test.ts
@@ -176,8 +176,8 @@ describe("ConcurrencyManager.acquire/release", () => {
    await manager.acquire("model-a")
    await manager.acquire("model-a")

-    // #then - both resolved without waiting
-    expect(true).toBe(true)
+    // #then - both resolved without waiting, count should be 2
+    expect(manager.getCount("model-a")).toBe(2)
  })

  test("should allow acquires up to default limit of 5", async () => {
@@ -190,8 +190,8 @@ describe("ConcurrencyManager.acquire/release", () => {
    await manager.acquire("model-a")
    await manager.acquire("model-a")

-    // #then - all 5 resolved
-    expect(true).toBe(true)
+    // #then - all 5 resolved, count should be 5
+    expect(manager.getCount("model-a")).toBe(5)
  })

  test("should queue when limit reached", async () => {
@@ -276,8 +276,8 @@ describe("ConcurrencyManager.acquire/release", () => {
    manager.release("model-a")
    await manager.acquire("model-a")

-    // #then
-    expect(true).toBe(true)
+    // #then - count should be 1 after re-acquiring
+    expect(manager.getCount("model-a")).toBe(1)
  })

  test("should handle release when no acquire", () => {
@@ -288,21 +288,21 @@ describe("ConcurrencyManager.acquire/release", () => {
    // #when - release without acquire
    manager.release("model-a")

-    // #then - should not throw
-    expect(true).toBe(true)
+    // #then - count should be 0 (no negative count)
+    expect(manager.getCount("model-a")).toBe(0)
  })

  test("should handle release when no prior acquire", () => {
    // #given - default config

-    // #when - release without acquire
-    manager.release("model-a")
+     // #when - release without acquire
+     manager.release("model-a")

-    // #then - should not throw
-    expect(true).toBe(true)
-  })
+     // #then - count should be 0 (no negative count)
+     expect(manager.getCount("model-a")).toBe(0)
+   })

-  test("should handle multiple acquires and releases correctly", async () => {
+   test("should handle multiple acquires and releases correctly", async () => {
    // #given
    const config: BackgroundTaskConfig = { defaultConcurrency: 3 }
    manager = new ConcurrencyManager(config)
@@ -317,11 +317,11 @@ describe("ConcurrencyManager.acquire/release", () => {
    manager.release("model-a")
    manager.release("model-a")

-    // Should be able to acquire again
-    await manager.acquire("model-a")
+     // Should be able to acquire again
+     await manager.acquire("model-a")

-    // #then
-    expect(true).toBe(true)
+     // #then - count should be 1 after re-acquiring
+     expect(manager.getCount("model-a")).toBe(1)
  })

  test("should use model-specific limit for acquire", async () => {
--- a/src/features/mcp-oauth/callback-server.test.ts
+++ b/src/features/mcp-oauth/callback-server.test.ts
@@ -1,6 +1,8 @@
 import { afterEach, describe, expect, it } from "bun:test"
 import { findAvailablePort, startCallbackServer, type CallbackServer } from "./callback-server"

+const nativeFetch = Bun.fetch.bind(Bun)
+
 describe("findAvailablePort", () => {
  it("returns the start port when it is available", async () => {
    //#given
@@ -34,9 +36,11 @@ describe("findAvailablePort", () => {
 describe("startCallbackServer", () => {
  let server: CallbackServer | null = null

-  afterEach(() => {
+  afterEach(async () => {
    server?.close()
    server = null
+    // Allow time for port to be released before next test
+    await Bun.sleep(10)
  })

  it("starts server and returns port", async () => {
@@ -57,9 +61,12 @@ describe("startCallbackServer", () => {
    const callbackUrl = `http://127.0.0.1:${server.port}/oauth/callback?code=test-code&state=test-state`

    //#when
-    const fetchPromise = fetch(callbackUrl)
-    const result = await server.waitForCallback()
-    const response = await fetchPromise
+    // Use Promise.all to ensure fetch and waitForCallback run concurrently
+    // This prevents race condition where waitForCallback blocks before fetch starts
+    const [result, response] = await Promise.all([
+      server.waitForCallback(),
+      nativeFetch(callbackUrl)
+    ])

    //#then
    expect(result).toEqual({ code: "test-code", state: "test-state" })
@@ -73,7 +80,7 @@ describe("startCallbackServer", () => {
    server = await startCallbackServer()

    //#when
-    const response = await fetch(`http://127.0.0.1:${server.port}/other`)
+    const response = await nativeFetch(`http://127.0.0.1:${server.port}/other`)

    //#then
    expect(response.status).toBe(404)
@@ -85,7 +92,7 @@ describe("startCallbackServer", () => {
    const callbackRejection = server.waitForCallback().catch((e: Error) => e)

    //#when
-    const response = await fetch(`http://127.0.0.1:${server.port}/oauth/callback?state=s`)
+    const response = await nativeFetch(`http://127.0.0.1:${server.port}/oauth/callback?state=s`)

    //#then
    expect(response.status).toBe(400)
@@ -100,7 +107,7 @@ describe("startCallbackServer", () => {
    const callbackRejection = server.waitForCallback().catch((e: Error) => e)

    //#when
-    const response = await fetch(`http://127.0.0.1:${server.port}/oauth/callback?code=c`)
+    const response = await nativeFetch(`http://127.0.0.1:${server.port}/oauth/callback?code=c`)

    //#then
    expect(response.status).toBe(400)
@@ -120,7 +127,7 @@ describe("startCallbackServer", () => {

    //#then
    try {
-      await fetch(`http://127.0.0.1:${port}/oauth/callback?code=c&state=s`)
+      await nativeFetch(`http://127.0.0.1:${port}/oauth/callback?code=c&state=s`)
      expect(true).toBe(false)
    } catch (error) {
      expect(error).toBeDefined()
--- a/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts
@@ -1,11 +1,83 @@
-import { describe, test, expect, mock, beforeEach, spyOn } from "bun:test"
+import { afterEach, beforeEach, describe, expect, mock, spyOn, test } from "bun:test"
 import { executeCompact } from "./executor"
 import type { AutoCompactState } from "./types"
 import * as storage from "./storage"

+type TimerCallback = (...args: any[]) => void
+
+interface FakeTimeouts {
+  advanceBy: (ms: number) => Promise<void>
+  restore: () => void
+}
+
+function createFakeTimeouts(): FakeTimeouts {
+  let now = 0
+  let nextId = 1
+  const timers = new Map<number, { id: number; time: number; callback: TimerCallback; args: any[] }>()
+  const cleared = new Set<number>()
+
+  const original = {
+    setTimeout: globalThis.setTimeout,
+    clearTimeout: globalThis.clearTimeout,
+  }
+
+  const normalizeDelay = (delay?: number) => {
+    if (typeof delay !== "number" || !Number.isFinite(delay)) return 0
+    return delay < 0 ? 0 : delay
+  }
+
+  globalThis.setTimeout = ((callback: TimerCallback, delay?: number, ...args: any[]) => {
+    const id = nextId++
+    timers.set(id, {
+      id,
+      time: now + normalizeDelay(delay),
+      callback,
+      args,
+    })
+    return id as unknown as ReturnType<typeof setTimeout>
+  }) as typeof setTimeout
+
+  globalThis.clearTimeout = ((id?: number) => {
+    if (typeof id !== "number") return
+    cleared.add(id)
+    timers.delete(id)
+  }) as typeof clearTimeout
+
+  const advanceBy = async (ms: number) => {
+    const target = now + Math.max(0, ms)
+    while (true) {
+      let next: { id: number; time: number; callback: TimerCallback; args: any[] } | undefined
+      for (const timer of timers.values()) {
+        if (timer.time <= target && (!next || timer.time < next.time)) {
+          next = timer
+        }
+      }
+      if (!next) break
+
+      now = next.time
+      timers.delete(next.id)
+      if (!cleared.has(next.id)) {
+        next.callback(...next.args)
+      }
+      cleared.delete(next.id)
+      await Promise.resolve()
+    }
+    now = target
+    await Promise.resolve()
+  }
+
+  const restore = () => {
+    globalThis.setTimeout = original.setTimeout
+    globalThis.clearTimeout = original.clearTimeout
+  }
+
+  return { advanceBy, restore }
+}
+
 describe("executeCompact lock management", () => {
  let autoCompactState: AutoCompactState
  let mockClient: any
+  let fakeTimeouts: FakeTimeouts
  const sessionID = "test-session-123"
  const directory = "/test/dir"
  const msg = { providerID: "anthropic", modelID: "claude-opus-4-5" }
@@ -32,6 +104,12 @@ describe("executeCompact lock management", () => {
        showToast: mock(() => Promise.resolve()),
      },
    }
+
+    fakeTimeouts = createFakeTimeouts()
+  })
+
+  afterEach(() => {
+    fakeTimeouts.restore()
  })

  test("clears lock on successful summarize completion", async () => {
@@ -216,7 +294,7 @@ describe("executeCompact lock management", () => {
    await executeCompact(sessionID, msg, autoCompactState, mockClient, directory)

    // Wait for setTimeout callback
-    await new Promise((resolve) => setTimeout(resolve, 600))
+    await fakeTimeouts.advanceBy(600)

    // #then: Lock should be cleared
    // The continuation happens in setTimeout, but lock is cleared in finally before that
@@ -288,7 +366,7 @@ describe("executeCompact lock management", () => {
    await executeCompact(sessionID, msg, autoCompactState, mockClient, directory)

    // Wait for setTimeout callback
-    await new Promise((resolve) => setTimeout(resolve, 600))
+    await fakeTimeouts.advanceBy(600)

    // #then: Truncation was attempted
    expect(truncateSpy).toHaveBeenCalled()
--- a/src/hooks/todo-continuation-enforcer.test.ts
+++ b/src/hooks/todo-continuation-enforcer.test.ts
@@ -4,9 +4,123 @@ import type { BackgroundManager } from "../features/background-agent"
 import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state"
 import { createTodoContinuationEnforcer } from "./todo-continuation-enforcer"

+type TimerCallback = (...args: any[]) => void
+
+interface FakeTimers {
+  advanceBy: (ms: number, advanceClock?: boolean) => Promise<void>
+  restore: () => void
+}
+
+function createFakeTimers(): FakeTimers {
+  const originalNow = Date.now()
+  let clockNow = originalNow
+  let timerNow = 0
+  let nextId = 1
+  const timers = new Map<number, { id: number; time: number; interval: number | null; callback: TimerCallback; args: any[] }>()
+  const cleared = new Set<number>()
+
+  const original = {
+    setTimeout: globalThis.setTimeout,
+    clearTimeout: globalThis.clearTimeout,
+    setInterval: globalThis.setInterval,
+    clearInterval: globalThis.clearInterval,
+    dateNow: Date.now,
+  }
+
+  const normalizeDelay = (delay?: number) => {
+    if (typeof delay !== "number" || !Number.isFinite(delay)) return 0
+    return delay < 0 ? 0 : delay
+  }
+
+  const schedule = (callback: TimerCallback, delay: number | undefined, interval: number | null, args: any[]) => {
+    const id = nextId++
+    timers.set(id, {
+      id,
+      time: timerNow + normalizeDelay(delay),
+      interval,
+      callback,
+      args,
+    })
+    return id
+  }
+
+  const clear = (id: number | undefined) => {
+    if (typeof id !== "number") return
+    cleared.add(id)
+    timers.delete(id)
+  }
+
+  globalThis.setTimeout = ((callback: TimerCallback, delay?: number, ...args: any[]) => {
+    return schedule(callback, delay, null, args) as unknown as ReturnType<typeof setTimeout>
+  }) as typeof setTimeout
+
+  globalThis.setInterval = ((callback: TimerCallback, delay?: number, ...args: any[]) => {
+    const interval = normalizeDelay(delay)
+    return schedule(callback, delay, interval, args) as unknown as ReturnType<typeof setInterval>
+  }) as typeof setInterval
+
+  globalThis.clearTimeout = ((id?: number) => {
+    clear(id)
+  }) as typeof clearTimeout
+
+  globalThis.clearInterval = ((id?: number) => {
+    clear(id)
+  }) as typeof clearInterval
+
+  Date.now = () => clockNow
+
+  const advanceBy = async (ms: number, advanceClock: boolean = false) => {
+    const clamped = Math.max(0, ms)
+    const target = timerNow + clamped
+    if (advanceClock) {
+      clockNow += clamped
+    }
+    while (true) {
+      let next: { id: number; time: number; interval: number | null; callback: TimerCallback; args: any[] } | undefined
+      for (const timer of timers.values()) {
+        if (timer.time <= target && (!next || timer.time < next.time)) {
+          next = timer
+        }
+      }
+      if (!next) break
+
+      timerNow = next.time
+      timers.delete(next.id)
+      next.callback(...next.args)
+
+      if (next.interval !== null && !cleared.has(next.id)) {
+        timers.set(next.id, {
+          id: next.id,
+          time: timerNow + next.interval,
+          interval: next.interval,
+          callback: next.callback,
+          args: next.args,
+        })
+      } else {
+        cleared.delete(next.id)
+      }
+
+      await Promise.resolve()
+    }
+    timerNow = target
+    await Promise.resolve()
+  }
+
+  const restore = () => {
+    globalThis.setTimeout = original.setTimeout
+    globalThis.clearTimeout = original.clearTimeout
+    globalThis.setInterval = original.setInterval
+    globalThis.clearInterval = original.clearInterval
+    Date.now = original.dateNow
+  }
+
+  return { advanceBy, restore }
+}
+
 describe("todo-continuation-enforcer", () => {
  let promptCalls: Array<{ sessionID: string; agent?: string; model?: { providerID?: string; modelID?: string }; text: string }>
  let toastCalls: Array<{ title: string; message: string }>
+  let fakeTimers: FakeTimers

  interface MockMessage {
    info: {
@@ -60,6 +174,7 @@ describe("todo-continuation-enforcer", () => {
  }

  beforeEach(() => {
+    fakeTimers = createFakeTimers()
    _resetForTesting()
    promptCalls = []
    toastCalls = []
@@ -67,6 +182,7 @@ describe("todo-continuation-enforcer", () => {
  })

  afterEach(() => {
+    fakeTimers.restore()
    _resetForTesting()
  })

@@ -85,12 +201,12 @@ describe("todo-continuation-enforcer", () => {
    })

    // #then - countdown toast shown
-    await new Promise(r => setTimeout(r, 100))
+    await fakeTimers.advanceBy(100)
    expect(toastCalls.length).toBeGreaterThanOrEqual(1)
    expect(toastCalls[0].title).toBe("Todo Continuation")

    // #then - after countdown, continuation injected
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)
    expect(promptCalls.length).toBe(1)
    expect(promptCalls[0].text).toContain("TODO CONTINUATION")
  })
@@ -112,7 +228,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected
    expect(promptCalls).toHaveLength(0)
@@ -132,7 +248,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected
    expect(promptCalls).toHaveLength(0)
@@ -150,7 +266,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID: otherSession } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected
    expect(promptCalls).toHaveLength(0)
@@ -170,7 +286,7 @@ describe("todo-continuation-enforcer", () => {
    })

    // #then - continuation injected for background task session
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)
    expect(promptCalls.length).toBe(1)
    expect(promptCalls[0].sessionID).toBe(bgTaskSession)
  })
@@ -190,7 +306,7 @@ describe("todo-continuation-enforcer", () => {
    })

    // #when - wait past grace period (500ms), then user sends message
-    await new Promise(r => setTimeout(r, 600))
+    await fakeTimers.advanceBy(600, true)
    await hook.handler({
      event: {
        type: "message.updated",
@@ -199,7 +315,7 @@ describe("todo-continuation-enforcer", () => {
    })

    // #then - wait past countdown time and verify no injection (countdown was cancelled)
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)
    expect(promptCalls).toHaveLength(0)
  })

@@ -223,9 +339,9 @@ describe("todo-continuation-enforcer", () => {
      },
    })

-    // #then - countdown should continue (message was ignored)
+     // #then - countdown should continue (message was ignored)
    // wait past 2s countdown and verify injection happens
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)
    expect(promptCalls).toHaveLength(1)
  })

@@ -242,7 +358,7 @@ describe("todo-continuation-enforcer", () => {
    })

    // #when - assistant starts responding
-    await new Promise(r => setTimeout(r, 500))
+    await fakeTimers.advanceBy(500)
    await hook.handler({
      event: {
        type: "message.part.updated",
@@ -250,7 +366,7 @@ describe("todo-continuation-enforcer", () => {
      },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected (cancelled)
    expect(promptCalls).toHaveLength(0)
@@ -269,12 +385,12 @@ describe("todo-continuation-enforcer", () => {
    })

    // #when - tool starts executing
-    await new Promise(r => setTimeout(r, 500))
+    await fakeTimers.advanceBy(500)
    await hook.handler({
      event: { type: "tool.execute.before", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected (cancelled)
    expect(promptCalls).toHaveLength(0)
@@ -295,7 +411,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected
    expect(promptCalls).toHaveLength(0)
@@ -317,7 +433,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected
    expect(promptCalls.length).toBe(1)
@@ -336,12 +452,12 @@ describe("todo-continuation-enforcer", () => {
    })

    // #when - session is deleted during countdown
-    await new Promise(r => setTimeout(r, 500))
+    await fakeTimers.advanceBy(500)
    await hook.handler({
      event: { type: "session.deleted", properties: { info: { id: sessionID } } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation injected (cleaned up)
    expect(promptCalls).toHaveLength(0)
@@ -362,7 +478,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 100))
+    await fakeTimers.advanceBy(100)
    expect(toastCalls.length).toBeGreaterThanOrEqual(1)
  })

@@ -379,7 +495,7 @@ describe("todo-continuation-enforcer", () => {
    })

    // #then - multiple toast updates during countdown (2s countdown = 2 toasts: "2s" and "1s")
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)
    expect(toastCalls.length).toBeGreaterThanOrEqual(2)
    expect(toastCalls[0].message).toContain("2s")
  })
@@ -395,7 +511,7 @@ describe("todo-continuation-enforcer", () => {
    await hook.handler({
      event: { type: "session.idle", properties: { sessionID } },
    })
-    await new Promise(r => setTimeout(r, 3500))
+    await fakeTimers.advanceBy(3500)

    // #then - first injection happened
    expect(promptCalls.length).toBe(1)
@@ -404,7 +520,7 @@ describe("todo-continuation-enforcer", () => {
    await hook.handler({
      event: { type: "session.idle", properties: { sessionID } },
    })
-    await new Promise(r => setTimeout(r, 3500))
+    await fakeTimers.advanceBy(3500)

    // #then - second injection also happened (no throttle blocking)
    expect(promptCalls.length).toBe(2)
@@ -439,7 +555,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)

    // #then - continuation injected (non-abort errors don't block)
    expect(promptCalls.length).toBe(1)
@@ -472,7 +588,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (last message was aborted)
    expect(promptCalls).toHaveLength(0)
@@ -490,12 +606,12 @@ describe("todo-continuation-enforcer", () => {

    const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})

-    // #when - session goes idle
+     // #when - session goes idle
    await hook.handler({
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (no abort)
    expect(promptCalls.length).toBe(1)
@@ -518,7 +634,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (last message is user, not aborted assistant)
    expect(promptCalls.length).toBe(1)
@@ -541,7 +657,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (abort error detected)
    expect(promptCalls).toHaveLength(0)
@@ -566,12 +682,12 @@ describe("todo-continuation-enforcer", () => {
      },
    })

-    // #when - session goes idle immediately after
+     // #when - session goes idle immediately after
    await hook.handler({
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (abort detected via event)
    expect(promptCalls).toHaveLength(0)
@@ -601,7 +717,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (abort detected via event)
    expect(promptCalls).toHaveLength(0)
@@ -627,13 +743,13 @@ describe("todo-continuation-enforcer", () => {
    })

    // #when - wait >3s then idle fires
-    await new Promise(r => setTimeout(r, 3100))
+    await fakeTimers.advanceBy(3100, true)

    await hook.handler({
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (abort flag is stale)
    expect(promptCalls.length).toBeGreaterThan(0)
@@ -659,7 +775,7 @@ describe("todo-continuation-enforcer", () => {
    })

    // #when - user sends new message (clears abort flag)
-    await new Promise(r => setTimeout(r, 600))
+    await fakeTimers.advanceBy(600)
    await hook.handler({
      event: {
        type: "message.updated",
@@ -672,7 +788,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (abort flag was cleared by user activity)
    expect(promptCalls.length).toBeGreaterThan(0)
@@ -710,7 +826,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (abort flag was cleared by assistant activity)
    expect(promptCalls.length).toBeGreaterThan(0)
@@ -748,7 +864,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (abort flag was cleared by tool execution)
    expect(promptCalls.length).toBeGreaterThan(0)
@@ -778,7 +894,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (event-based detection wins over API)
    expect(promptCalls).toHaveLength(0)
@@ -800,7 +916,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (API fallback detected the abort)
    expect(promptCalls).toHaveLength(0)
@@ -820,7 +936,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)

    // #then - prompt call made, model is undefined when no context (expected behavior)
    expect(promptCalls.length).toBe(1)
@@ -867,7 +983,7 @@ describe("todo-continuation-enforcer", () => {

    // #when - session goes idle
    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)

    // #then - model should be extracted from assistant message's flat modelID/providerID
    expect(promptCalls.length).toBe(1)
@@ -919,7 +1035,7 @@ describe("todo-continuation-enforcer", () => {

    // #when - session goes idle
    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
-    await new Promise(r => setTimeout(r, 2500))
+    await fakeTimers.advanceBy(2500)

    // #then - continuation uses Sisyphus (skipped compaction agent)
    expect(promptCalls.length).toBe(1)
@@ -964,7 +1080,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (compaction is in default skipAgents)
    expect(promptCalls).toHaveLength(0)
@@ -1010,7 +1126,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - no continuation (prometheus found after filtering compaction, prometheus is in skipAgents)
    expect(promptCalls).toHaveLength(0)
@@ -1057,7 +1173,7 @@ describe("todo-continuation-enforcer", () => {
      event: { type: "session.idle", properties: { sessionID } },
    })

-    await new Promise(r => setTimeout(r, 3000))
+    await fakeTimers.advanceBy(3000)

    // #then - continuation injected (no agents to skip)
    expect(promptCalls.length).toBe(1)
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -1159,7 +1159,7 @@ describe("sisyphus-task", () => {
      const mockClient = {
        app: { agents: async () => ({ data: [] }) },
        config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
-        model: { list: async () => [{ id: "google/gemini-3-pro" }] },
+        model: { list: async () => ({ data: [{ provider: "google", id: "gemini-3-pro" }] }) },
        session: {
          get: async () => ({ data: { directory: "/project" } }),
          create: async () => ({ data: { id: "ses_unstable_gemini" } }),
@@ -1325,6 +1325,13 @@ describe("sisyphus-task", () => {
    test("artistry category (gemini) with run_in_background=false should force background but wait for result", async () => {
      // #given - artistry also uses gemini model
      const { createDelegateTask } = require("./tools")
+      const providerModelsSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue({
+        connected: ["anthropic", "google", "openai"],
+        updatedAt: new Date().toISOString(),
+        models: {
+          google: ["gemini-3-pro", "gemini-3-flash"],
+        },
+      })
      let launchCalled = false
      
      const mockManager = {
@@ -1343,7 +1350,7 @@ describe("sisyphus-task", () => {
      const mockClient = {
        app: { agents: async () => ({ data: [] }) },
        config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
-        model: { list: async () => [{ id: "google/gemini-3-pro" }] },
+        model: { list: async () => ({ data: [{ provider: "google", id: "gemini-3-pro" }] }) },
        session: {
          get: async () => ({ data: { directory: "/project" } }),
          create: async () => ({ data: { id: "ses_artistry_gemini" } }),
@@ -1385,6 +1392,7 @@ describe("sisyphus-task", () => {
      expect(launchCalled).toBe(true)
      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Artistry result here")
+      providerModelsSpy.mockRestore()
    }, { timeout: 20000 })

    test("writing category (gemini-flash) with run_in_background=false should force background but wait for result", async () => {