diff --git a/src/features/skill-mcp-manager/manager.test.ts b/src/features/skill-mcp-manager/manager.test.ts index b77e74efe..5c9120d49 100644 --- a/src/features/skill-mcp-manager/manager.test.ts +++ b/src/features/skill-mcp-manager/manager.test.ts @@ -502,4 +502,110 @@ describe("SkillMcpManager", () => { ) }) }) + + describe("operation retry logic", () => { + it("should retry operation when 'Not connected' error occurs", async () => { + // #given + const info: SkillMcpClientInfo = { + serverName: "retry-server", + skillName: "retry-skill", + sessionID: "session-retry-1", + } + const context: SkillMcpServerContext = { + config: { + url: "https://example.com/mcp", + }, + skillName: "retry-skill", + } + + // Mock client that fails first time with "Not connected", then succeeds + let callCount = 0 + const mockClient = { + callTool: mock(async () => { + callCount++ + if (callCount === 1) { + throw new Error("Not connected") + } + return { content: [{ type: "text", text: "success" }] } + }), + close: mock(() => Promise.resolve()), + } + + // Spy on getOrCreateClientWithRetry to inject mock client + const getOrCreateSpy = spyOn(manager as any, "getOrCreateClientWithRetry") + getOrCreateSpy.mockResolvedValue(mockClient) + + // #when + const result = await manager.callTool(info, context, "test-tool", {}) + + // #then + expect(callCount).toBe(2) // First call fails, second succeeds + expect(result).toEqual([{ type: "text", text: "success" }]) + expect(getOrCreateSpy).toHaveBeenCalledTimes(2) // Called twice due to retry + }) + + it("should fail after 3 retry attempts", async () => { + // #given + const info: SkillMcpClientInfo = { + serverName: "fail-server", + skillName: "fail-skill", + sessionID: "session-fail-1", + } + const context: SkillMcpServerContext = { + config: { + url: "https://example.com/mcp", + }, + skillName: "fail-skill", + } + + // Mock client that always fails with "Not connected" + const mockClient = { + callTool: mock(async () => { + throw new Error("Not connected") + }), + close: mock(() => Promise.resolve()), + } + + const getOrCreateSpy = spyOn(manager as any, "getOrCreateClientWithRetry") + getOrCreateSpy.mockResolvedValue(mockClient) + + // #when / #then + await expect(manager.callTool(info, context, "test-tool", {})).rejects.toThrow( + /Failed after 3 reconnection attempts/ + ) + expect(getOrCreateSpy).toHaveBeenCalledTimes(3) // Initial + 2 retries + }) + + it("should not retry on non-connection errors", async () => { + // #given + const info: SkillMcpClientInfo = { + serverName: "error-server", + skillName: "error-skill", + sessionID: "session-error-1", + } + const context: SkillMcpServerContext = { + config: { + url: "https://example.com/mcp", + }, + skillName: "error-skill", + } + + // Mock client that fails with non-connection error + const mockClient = { + callTool: mock(async () => { + throw new Error("Tool not found") + }), + close: mock(() => Promise.resolve()), + } + + const getOrCreateSpy = spyOn(manager as any, "getOrCreateClientWithRetry") + getOrCreateSpy.mockResolvedValue(mockClient) + + // #when / #then + await expect(manager.callTool(info, context, "test-tool", {})).rejects.toThrow( + "Tool not found" + ) + expect(getOrCreateSpy).toHaveBeenCalledTimes(1) // No retry + }) + }) }) diff --git a/src/features/skill-mcp-manager/manager.ts b/src/features/skill-mcp-manager/manager.ts index 7741b8ee4..b56cda8ed 100644 --- a/src/features/skill-mcp-manager/manager.ts +++ b/src/features/skill-mcp-manager/manager.ts @@ -415,9 +415,10 @@ export class SkillMcpManager { name: string, args: Record ): Promise { - const client = await this.getOrCreateClientWithRetry(info, context.config) - const result = await client.callTool({ name, arguments: args }) - return result.content + return this.withOperationRetry(info, context.config, async (client) => { + const result = await client.callTool({ name, arguments: args }) + return result.content + }) } async readResource( @@ -425,9 +426,10 @@ export class SkillMcpManager { context: SkillMcpServerContext, uri: string ): Promise { - const client = await this.getOrCreateClientWithRetry(info, context.config) - const result = await client.readResource({ uri }) - return result.contents + return this.withOperationRetry(info, context.config, async (client) => { + const result = await client.readResource({ uri }) + return result.contents + }) } async getPrompt( @@ -436,9 +438,53 @@ export class SkillMcpManager { name: string, args: Record ): Promise { - const client = await this.getOrCreateClientWithRetry(info, context.config) - const result = await client.getPrompt({ name, arguments: args }) - return result.messages + return this.withOperationRetry(info, context.config, async (client) => { + const result = await client.getPrompt({ name, arguments: args }) + return result.messages + }) + } + + private async withOperationRetry( + info: SkillMcpClientInfo, + config: ClaudeCodeMcpServer, + operation: (client: Client) => Promise + ): Promise { + const maxRetries = 3 + let lastError: Error | null = null + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + const client = await this.getOrCreateClientWithRetry(info, config) + return await operation(client) + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)) + const errorMessage = lastError.message.toLowerCase() + + if (!errorMessage.includes("not connected")) { + throw lastError + } + + if (attempt === maxRetries) { + throw new Error( + `Failed after ${maxRetries} reconnection attempts: ${lastError.message}` + ) + } + + const key = this.getClientKey(info) + const existing = this.clients.get(key) + if (existing) { + this.clients.delete(key) + try { + await existing.client.close() + } catch { /* process may already be terminated */ } + try { + await existing.transport.close() + } catch { /* transport may already be terminated */ } + } + } + } + + throw lastError || new Error("Operation failed with unknown error") } private async getOrCreateClientWithRetry(