From b405494808a1ec4e8274841f601b97f738535389 Mon Sep 17 00:00:00 2001 From: Kwanghyun Moon Date: Sat, 31 Jan 2026 12:46:05 +0900 Subject: [PATCH] fix: resolve deadlock in config handler during plugin initialization (#1304) * fix: resolve deadlock in config handler during plugin initialization The config handler and createBuiltinAgents were calling fetchAvailableModels with client, which triggers client.provider.list() API call to OpenCode server. This caused a deadlock because: - Plugin initialization waits for server response - Server waits for plugin init to complete before handling requests Now using cache-only mode by passing undefined instead of client. If cache is unavailable, the fallback chain will use the first model. Fixes #1301 * test: add regression tests for deadlock prevention in fetchAvailableModels Add tests to ensure fetchAvailableModels is called with undefined client during plugin initialization. This prevents regression on issue #1301. - config-handler.test.ts: verify config handler does not pass client - utils.test.ts: verify createBuiltinAgents does not pass client * test: restore spies in utils.test.ts to prevent test pollution Add mockRestore() calls for all spies created in test cases to ensure proper cleanup between tests and prevent state leakage. * test: restore fetchAvailableModels spy --------- Co-authored-by: robin Co-authored-by: justsisyphus --- src/agents/utils.test.ts | 139 +++++++++++++-------- src/agents/utils.ts | 9 +- src/plugin-handlers/config-handler.test.ts | 43 +++++++ src/plugin-handlers/config-handler.ts | 12 +- 4 files changed, 147 insertions(+), 56 deletions(-) diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts index bb24f2f82..d4249c612 100644 --- a/src/agents/utils.test.ts +++ b/src/agents/utils.test.ts @@ -3,6 +3,7 @@ import { createBuiltinAgents } from "./utils" import type { AgentConfig } from "@opencode-ai/sdk" import { clearSkillCache } from "../features/opencode-skill-loader/skill-content" import * as connectedProvidersCache from "../shared/connected-providers-cache" +import * as modelAvailability from "../shared/model-availability" const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-5" @@ -47,32 +48,32 @@ describe("createBuiltinAgents with model overrides", () => { expect(agents.sisyphus.reasoningEffort).toBeUndefined() }) - test("Oracle uses connected provider fallback when availableModels is empty and cache exists", async () => { - // #given - connected providers cache has "openai", which matches oracle's first fallback entry - const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) + test("Oracle uses connected provider fallback when availableModels is empty and cache exists", async () => { + // #given - connected providers cache has "openai", which matches oracle's first fallback entry + const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) - // #when - const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL) + // #when + const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL) - // #then - oracle resolves via connected cache fallback to openai/gpt-5.2 (not system default) - expect(agents.oracle.model).toBe("openai/gpt-5.2") - expect(agents.oracle.reasoningEffort).toBe("medium") - expect(agents.oracle.thinking).toBeUndefined() - cacheSpy.mockRestore() - }) + // #then - oracle resolves via connected cache fallback to openai/gpt-5.2 (not system default) + expect(agents.oracle.model).toBe("openai/gpt-5.2") + expect(agents.oracle.reasoningEffort).toBe("medium") + expect(agents.oracle.thinking).toBeUndefined() + cacheSpy.mockRestore?.() + }) - test("Oracle created without model field when no cache exists (first run scenario)", async () => { - // #given - no cache at all (first run) - const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null) + test("Oracle created without model field when no cache exists (first run scenario)", async () => { + // #given - no cache at all (first run) + const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null) - // #when - const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL) + // #when + const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL) - // #then - oracle should be created with system default model (fallback to systemDefaultModel) - expect(agents.oracle).toBeDefined() - expect(agents.oracle.model).toBe(TEST_DEFAULT_MODEL) - cacheSpy.mockRestore() - }) + // #then - oracle should be created with system default model (fallback to systemDefaultModel) + expect(agents.oracle).toBeDefined() + expect(agents.oracle.model).toBe(TEST_DEFAULT_MODEL) + cacheSpy.mockRestore?.() + }) test("Oracle with GPT model override has reasoningEffort, no thinking", async () => { // #given @@ -122,43 +123,43 @@ describe("createBuiltinAgents with model overrides", () => { }) describe("createBuiltinAgents without systemDefaultModel", () => { - test("agents created via connected cache fallback even without systemDefaultModel", async () => { - // #given - connected cache has "openai", which matches oracle's fallback chain - const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) + test("agents created via connected cache fallback even without systemDefaultModel", async () => { + // #given - connected cache has "openai", which matches oracle's fallback chain + const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"]) - // #when - const agents = await createBuiltinAgents([], {}, undefined, undefined) + // #when + const agents = await createBuiltinAgents([], {}, undefined, undefined) - // #then - connected cache enables model resolution despite no systemDefaultModel - expect(agents.oracle).toBeDefined() - expect(agents.oracle.model).toBe("openai/gpt-5.2") - cacheSpy.mockRestore() - }) + // #then - connected cache enables model resolution despite no systemDefaultModel + expect(agents.oracle).toBeDefined() + expect(agents.oracle.model).toBe("openai/gpt-5.2") + cacheSpy.mockRestore?.() + }) - test("agents NOT created when no cache and no systemDefaultModel (first run without defaults)", async () => { - // #given - const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null) + test("agents NOT created when no cache and no systemDefaultModel (first run without defaults)", async () => { + // #given + const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null) - // #when - const agents = await createBuiltinAgents([], {}, undefined, undefined) + // #when + const agents = await createBuiltinAgents([], {}, undefined, undefined) - // #then - expect(agents.oracle).toBeUndefined() - cacheSpy.mockRestore() - }) + // #then + expect(agents.oracle).toBeUndefined() + cacheSpy.mockRestore?.() + }) - test("sisyphus created via connected cache fallback even without systemDefaultModel", async () => { - // #given - connected cache has "anthropic", which matches sisyphus's first fallback entry - const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic"]) + test("sisyphus created via connected cache fallback even without systemDefaultModel", async () => { + // #given - connected cache has "anthropic", which matches sisyphus's first fallback entry + const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic"]) - // #when - const agents = await createBuiltinAgents([], {}, undefined, undefined) + // #when + const agents = await createBuiltinAgents([], {}, undefined, undefined) - // #then - connected cache enables model resolution despite no systemDefaultModel - expect(agents.sisyphus).toBeDefined() - expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5") - cacheSpy.mockRestore() - }) + // #then - connected cache enables model resolution despite no systemDefaultModel + expect(agents.sisyphus).toBeDefined() + expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5") + cacheSpy.mockRestore?.() + }) }) describe("buildAgent with category and skills", () => { @@ -523,3 +524,41 @@ describe("override.category expansion in createBuiltinAgents", () => { expect(agents.oracle.model).toBe(agentsWithoutOverride.oracle.model) }) }) + +describe("Deadlock prevention - fetchAvailableModels must not receive client", () => { + test("createBuiltinAgents should call fetchAvailableModels with undefined client to prevent deadlock", async () => { + // #given - This test ensures we don't regress on issue #1301 + // Passing client to fetchAvailableModels during createBuiltinAgents (called from config handler) + // causes deadlock: + // - Plugin init waits for server response (client.provider.list()) + // - Server waits for plugin init to complete before handling requests + const fetchSpy = spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(new Set()) + const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null) + + const mockClient = { + provider: { list: () => Promise.resolve({ data: { connected: [] } }) }, + model: { list: () => Promise.resolve({ data: [] }) }, + } + + // #when - Even when client is provided, fetchAvailableModels must be called with undefined + await createBuiltinAgents( + [], + {}, + undefined, + TEST_DEFAULT_MODEL, + undefined, + undefined, + [], + mockClient // client is passed but should NOT be forwarded to fetchAvailableModels + ) + + // #then - fetchAvailableModels must be called with undefined as first argument (no client) + // This prevents the deadlock described in issue #1301 + expect(fetchSpy).toHaveBeenCalled() + const firstCallArgs = fetchSpy.mock.calls[0] + expect(firstCallArgs[0]).toBeUndefined() + + fetchSpy.mockRestore?.() + cacheSpy.mockRestore?.() + }) +}) diff --git a/src/agents/utils.ts b/src/agents/utils.ts index 83f4f8dc4..2ce55342a 100644 --- a/src/agents/utils.ts +++ b/src/agents/utils.ts @@ -180,9 +180,12 @@ export async function createBuiltinAgents( uiSelectedModel?: string ): Promise> { const connectedProviders = readConnectedProvidersCache() - const availableModels = client - ? await fetchAvailableModels(client, { connectedProviders: connectedProviders ?? undefined }) - : new Set() + // IMPORTANT: Do NOT pass client to fetchAvailableModels during plugin initialization. + // This function is called from config handler, and calling client API causes deadlock. + // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301 + const availableModels = await fetchAvailableModels(undefined, { + connectedProviders: connectedProviders ?? undefined, + }) const result: Record = {} const availableAgents: AvailableAgent[] = [] diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts index 385c8ce68..69866362c 100644 --- a/src/plugin-handlers/config-handler.test.ts +++ b/src/plugin-handlers/config-handler.test.ts @@ -396,3 +396,46 @@ describe("Prometheus direct override priority over category", () => { expect(agents.prometheus.temperature).toBe(0.1) }) }) + +describe("Deadlock prevention - fetchAvailableModels must not receive client", () => { + test("fetchAvailableModels should be called with undefined client to prevent deadlock during plugin init", async () => { + // #given - This test ensures we don't regress on issue #1301 + // Passing client to fetchAvailableModels during config handler causes deadlock: + // - Plugin init waits for server response (client.provider.list()) + // - Server waits for plugin init to complete before handling requests + const fetchSpy = spyOn(shared, "fetchAvailableModels" as any).mockResolvedValue(new Set()) + + const pluginConfig: OhMyOpenCodeConfig = { + sisyphus_agent: { + planner_enabled: true, + }, + } + const config: Record = { + model: "anthropic/claude-opus-4-5", + agent: {}, + } + const mockClient = { + provider: { list: () => Promise.resolve({ data: { connected: [] } }) }, + model: { list: () => Promise.resolve({ data: [] }) }, + } + const handler = createConfigHandler({ + ctx: { directory: "/tmp", client: mockClient }, + pluginConfig, + modelCacheState: { + anthropicContext1MEnabled: false, + modelContextLimitsCache: new Map(), + }, + }) + + // #when + await handler(config) + + // #then - fetchAvailableModels must be called with undefined as first argument (no client) + // This prevents the deadlock described in issue #1301 + expect(fetchSpy).toHaveBeenCalled() + const firstCallArgs = fetchSpy.mock.calls[0] + expect(firstCallArgs[0]).toBeUndefined() + + fetchSpy.mockRestore?.() + }) +}) diff --git a/src/plugin-handlers/config-handler.ts b/src/plugin-handlers/config-handler.ts index 37f7451f5..471ce1d81 100644 --- a/src/plugin-handlers/config-handler.ts +++ b/src/plugin-handlers/config-handler.ts @@ -249,9 +249,15 @@ export function createConfigHandler(deps: ConfigHandlerDeps) { const prometheusRequirement = AGENT_MODEL_REQUIREMENTS["prometheus"]; const connectedProviders = readConnectedProvidersCache(); - const availableModels = ctx.client - ? await fetchAvailableModels(ctx.client, { connectedProviders: connectedProviders ?? undefined }) - : new Set(); + // IMPORTANT: Do NOT pass ctx.client to fetchAvailableModels during plugin initialization. + // Calling client API (e.g., client.provider.list()) from config handler causes deadlock: + // - Plugin init waits for server response + // - Server waits for plugin init to complete before handling requests + // Use cache-only mode instead. If cache is unavailable, fallback chain uses first model. + // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301 + const availableModels = await fetchAvailableModels(undefined, { + connectedProviders: connectedProviders ?? undefined, + }); const modelResolution = resolveModelWithFallback({ uiSelectedModel: currentModel,