feat(skill-mcp): add auto-reconnect retry on "Not connected" errors
- Added withOperationRetry<T>() helper method that retries operations up to 3 times - Catches "Not connected" errors (case-insensitive) - Cleans up stale client before retry - Modified callTool, readResource, getPrompt to use retry logic - Added tests for retry behavior (3 new test cases) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -502,4 +502,110 @@ describe("SkillMcpManager", () => {
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe("operation retry logic", () => {
|
||||
it("should retry operation when 'Not connected' error occurs", async () => {
|
||||
// #given
|
||||
const info: SkillMcpClientInfo = {
|
||||
serverName: "retry-server",
|
||||
skillName: "retry-skill",
|
||||
sessionID: "session-retry-1",
|
||||
}
|
||||
const context: SkillMcpServerContext = {
|
||||
config: {
|
||||
url: "https://example.com/mcp",
|
||||
},
|
||||
skillName: "retry-skill",
|
||||
}
|
||||
|
||||
// Mock client that fails first time with "Not connected", then succeeds
|
||||
let callCount = 0
|
||||
const mockClient = {
|
||||
callTool: mock(async () => {
|
||||
callCount++
|
||||
if (callCount === 1) {
|
||||
throw new Error("Not connected")
|
||||
}
|
||||
return { content: [{ type: "text", text: "success" }] }
|
||||
}),
|
||||
close: mock(() => Promise.resolve()),
|
||||
}
|
||||
|
||||
// Spy on getOrCreateClientWithRetry to inject mock client
|
||||
const getOrCreateSpy = spyOn(manager as any, "getOrCreateClientWithRetry")
|
||||
getOrCreateSpy.mockResolvedValue(mockClient)
|
||||
|
||||
// #when
|
||||
const result = await manager.callTool(info, context, "test-tool", {})
|
||||
|
||||
// #then
|
||||
expect(callCount).toBe(2) // First call fails, second succeeds
|
||||
expect(result).toEqual([{ type: "text", text: "success" }])
|
||||
expect(getOrCreateSpy).toHaveBeenCalledTimes(2) // Called twice due to retry
|
||||
})
|
||||
|
||||
it("should fail after 3 retry attempts", async () => {
|
||||
// #given
|
||||
const info: SkillMcpClientInfo = {
|
||||
serverName: "fail-server",
|
||||
skillName: "fail-skill",
|
||||
sessionID: "session-fail-1",
|
||||
}
|
||||
const context: SkillMcpServerContext = {
|
||||
config: {
|
||||
url: "https://example.com/mcp",
|
||||
},
|
||||
skillName: "fail-skill",
|
||||
}
|
||||
|
||||
// Mock client that always fails with "Not connected"
|
||||
const mockClient = {
|
||||
callTool: mock(async () => {
|
||||
throw new Error("Not connected")
|
||||
}),
|
||||
close: mock(() => Promise.resolve()),
|
||||
}
|
||||
|
||||
const getOrCreateSpy = spyOn(manager as any, "getOrCreateClientWithRetry")
|
||||
getOrCreateSpy.mockResolvedValue(mockClient)
|
||||
|
||||
// #when / #then
|
||||
await expect(manager.callTool(info, context, "test-tool", {})).rejects.toThrow(
|
||||
/Failed after 3 reconnection attempts/
|
||||
)
|
||||
expect(getOrCreateSpy).toHaveBeenCalledTimes(3) // Initial + 2 retries
|
||||
})
|
||||
|
||||
it("should not retry on non-connection errors", async () => {
|
||||
// #given
|
||||
const info: SkillMcpClientInfo = {
|
||||
serverName: "error-server",
|
||||
skillName: "error-skill",
|
||||
sessionID: "session-error-1",
|
||||
}
|
||||
const context: SkillMcpServerContext = {
|
||||
config: {
|
||||
url: "https://example.com/mcp",
|
||||
},
|
||||
skillName: "error-skill",
|
||||
}
|
||||
|
||||
// Mock client that fails with non-connection error
|
||||
const mockClient = {
|
||||
callTool: mock(async () => {
|
||||
throw new Error("Tool not found")
|
||||
}),
|
||||
close: mock(() => Promise.resolve()),
|
||||
}
|
||||
|
||||
const getOrCreateSpy = spyOn(manager as any, "getOrCreateClientWithRetry")
|
||||
getOrCreateSpy.mockResolvedValue(mockClient)
|
||||
|
||||
// #when / #then
|
||||
await expect(manager.callTool(info, context, "test-tool", {})).rejects.toThrow(
|
||||
"Tool not found"
|
||||
)
|
||||
expect(getOrCreateSpy).toHaveBeenCalledTimes(1) // No retry
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -415,9 +415,10 @@ export class SkillMcpManager {
|
||||
name: string,
|
||||
args: Record<string, unknown>
|
||||
): Promise<unknown> {
|
||||
const client = await this.getOrCreateClientWithRetry(info, context.config)
|
||||
const result = await client.callTool({ name, arguments: args })
|
||||
return result.content
|
||||
return this.withOperationRetry(info, context.config, async (client) => {
|
||||
const result = await client.callTool({ name, arguments: args })
|
||||
return result.content
|
||||
})
|
||||
}
|
||||
|
||||
async readResource(
|
||||
@@ -425,9 +426,10 @@ export class SkillMcpManager {
|
||||
context: SkillMcpServerContext,
|
||||
uri: string
|
||||
): Promise<unknown> {
|
||||
const client = await this.getOrCreateClientWithRetry(info, context.config)
|
||||
const result = await client.readResource({ uri })
|
||||
return result.contents
|
||||
return this.withOperationRetry(info, context.config, async (client) => {
|
||||
const result = await client.readResource({ uri })
|
||||
return result.contents
|
||||
})
|
||||
}
|
||||
|
||||
async getPrompt(
|
||||
@@ -436,9 +438,53 @@ export class SkillMcpManager {
|
||||
name: string,
|
||||
args: Record<string, string>
|
||||
): Promise<unknown> {
|
||||
const client = await this.getOrCreateClientWithRetry(info, context.config)
|
||||
const result = await client.getPrompt({ name, arguments: args })
|
||||
return result.messages
|
||||
return this.withOperationRetry(info, context.config, async (client) => {
|
||||
const result = await client.getPrompt({ name, arguments: args })
|
||||
return result.messages
|
||||
})
|
||||
}
|
||||
|
||||
private async withOperationRetry<T>(
|
||||
info: SkillMcpClientInfo,
|
||||
config: ClaudeCodeMcpServer,
|
||||
operation: (client: Client) => Promise<T>
|
||||
): Promise<T> {
|
||||
const maxRetries = 3
|
||||
let lastError: Error | null = null
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const client = await this.getOrCreateClientWithRetry(info, config)
|
||||
return await operation(client)
|
||||
} catch (error) {
|
||||
lastError = error instanceof Error ? error : new Error(String(error))
|
||||
const errorMessage = lastError.message.toLowerCase()
|
||||
|
||||
if (!errorMessage.includes("not connected")) {
|
||||
throw lastError
|
||||
}
|
||||
|
||||
if (attempt === maxRetries) {
|
||||
throw new Error(
|
||||
`Failed after ${maxRetries} reconnection attempts: ${lastError.message}`
|
||||
)
|
||||
}
|
||||
|
||||
const key = this.getClientKey(info)
|
||||
const existing = this.clients.get(key)
|
||||
if (existing) {
|
||||
this.clients.delete(key)
|
||||
try {
|
||||
await existing.client.close()
|
||||
} catch { /* process may already be terminated */ }
|
||||
try {
|
||||
await existing.transport.close()
|
||||
} catch { /* transport may already be terminated */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError || new Error("Operation failed with unknown error")
|
||||
}
|
||||
|
||||
private async getOrCreateClientWithRetry(
|
||||
|
||||
Reference in New Issue
Block a user