fix(delegate-task): wait for result when forcing unstable agents to background
Previously, unstable agents (gemini models, is_unstable_agent=true) were forced to background mode but returned immediately with task ID. This caused callers to lose visibility into results. Now: launch as background for monitoring stability, but poll and wait for completion, returning actual task output like sync mode. 🤖 Generated with assistance of [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
@@ -958,6 +958,389 @@ describe("sisyphus-task", () => {
|
||||
}, { timeout: 20000 })
|
||||
})
|
||||
|
||||
describe("unstable agent forced background mode", () => {
|
||||
test("gemini model with run_in_background=false should force background but wait for result", async () => {
|
||||
// #given - category using gemini model with run_in_background=false
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-unstable",
|
||||
sessionID: "ses_unstable_gemini",
|
||||
description: "Unstable gemini task",
|
||||
agent: "Sisyphus-Junior",
|
||||
status: "running",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_unstable_gemini" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({
|
||||
data: [
|
||||
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Gemini task completed successfully" }] }
|
||||
]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_unstable_gemini": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - using visual-engineering (gemini model) with run_in_background=false
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test gemini forced background",
|
||||
prompt: "Do something visual",
|
||||
category: "visual-engineering",
|
||||
run_in_background: false,
|
||||
skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should launch as background BUT wait for and return actual result
|
||||
expect(launchCalled).toBe(true)
|
||||
expect(result).toContain("UNSTABLE AGENT")
|
||||
expect(result).toContain("Gemini task completed successfully")
|
||||
}, { timeout: 20000 })
|
||||
|
||||
test("gemini model with run_in_background=true should not show unstable message (normal background)", async () => {
|
||||
// #given - category using gemini model with run_in_background=true (normal background flow)
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-normal-bg",
|
||||
sessionID: "ses_normal_bg",
|
||||
description: "Normal background task",
|
||||
agent: "Sisyphus-Junior",
|
||||
status: "running",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
create: async () => ({ data: { id: "test-session" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - using visual-engineering with run_in_background=true (normal background)
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test normal background",
|
||||
prompt: "Do something visual",
|
||||
category: "visual-engineering",
|
||||
run_in_background: true, // User explicitly says true - normal background
|
||||
skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should NOT show unstable message (it's normal background flow)
|
||||
expect(launchCalled).toBe(true)
|
||||
expect(result).not.toContain("UNSTABLE AGENT MODE")
|
||||
expect(result).toContain("task-normal-bg")
|
||||
})
|
||||
|
||||
test("non-gemini model with run_in_background=false should run sync (not forced to background)", async () => {
|
||||
// #given - category using non-gemini model with run_in_background=false
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
let promptCalled = false
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return { id: "should-not-be-called", sessionID: "x", description: "x", agent: "x", status: "running" }
|
||||
},
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_sync_non_gemini" } }),
|
||||
prompt: async () => {
|
||||
promptCalled = true
|
||||
return { data: {} }
|
||||
},
|
||||
messages: async () => ({
|
||||
data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done sync" }] }]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_sync_non_gemini": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
// Use ultrabrain which uses gpt-5.2 (non-gemini)
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - using ultrabrain (gpt model) with run_in_background=false
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test non-gemini sync",
|
||||
prompt: "Do something smart",
|
||||
category: "ultrabrain",
|
||||
run_in_background: false,
|
||||
skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should run sync, NOT forced to background
|
||||
expect(launchCalled).toBe(false) // manager.launch should NOT be called
|
||||
expect(promptCalled).toBe(true) // sync mode uses session.prompt
|
||||
expect(result).not.toContain("UNSTABLE AGENT MODE")
|
||||
}, { timeout: 20000 })
|
||||
|
||||
test("artistry category (gemini) with run_in_background=false should force background but wait for result", async () => {
|
||||
// #given - artistry also uses gemini model
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-artistry",
|
||||
sessionID: "ses_artistry_gemini",
|
||||
description: "Artistry gemini task",
|
||||
agent: "Sisyphus-Junior",
|
||||
status: "running",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_artistry_gemini" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({
|
||||
data: [
|
||||
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Artistry result here" }] }
|
||||
]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_artistry_gemini": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - artistry category (gemini-3-pro-preview with max variant)
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test artistry forced background",
|
||||
prompt: "Do something artistic",
|
||||
category: "artistry",
|
||||
run_in_background: false,
|
||||
skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should launch as background BUT wait for and return actual result
|
||||
expect(launchCalled).toBe(true)
|
||||
expect(result).toContain("UNSTABLE AGENT")
|
||||
expect(result).toContain("Artistry result here")
|
||||
}, { timeout: 20000 })
|
||||
|
||||
test("writing category (gemini-flash) with run_in_background=false should force background but wait for result", async () => {
|
||||
// #given - writing uses gemini-3-flash-preview
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-writing",
|
||||
sessionID: "ses_writing_gemini",
|
||||
description: "Writing gemini task",
|
||||
agent: "Sisyphus-Junior",
|
||||
status: "running",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_writing_gemini" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({
|
||||
data: [
|
||||
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Writing result here" }] }
|
||||
]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_writing_gemini": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - writing category (gemini-3-flash-preview)
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test writing forced background",
|
||||
prompt: "Write something",
|
||||
category: "writing",
|
||||
run_in_background: false,
|
||||
skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should launch as background BUT wait for and return actual result
|
||||
expect(launchCalled).toBe(true)
|
||||
expect(result).toContain("UNSTABLE AGENT")
|
||||
expect(result).toContain("Writing result here")
|
||||
}, { timeout: 20000 })
|
||||
|
||||
test("is_unstable_agent=true should force background but wait for result", async () => {
|
||||
// #given - custom category with is_unstable_agent=true but non-gemini model
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-custom-unstable",
|
||||
sessionID: "ses_custom_unstable",
|
||||
description: "Custom unstable task",
|
||||
agent: "Sisyphus-Junior",
|
||||
status: "running",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
app: { agents: async () => ({ data: [] }) },
|
||||
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
|
||||
session: {
|
||||
get: async () => ({ data: { directory: "/project" } }),
|
||||
create: async () => ({ data: { id: "ses_custom_unstable" } }),
|
||||
prompt: async () => ({ data: {} }),
|
||||
messages: async () => ({
|
||||
data: [
|
||||
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Custom unstable result" }] }
|
||||
]
|
||||
}),
|
||||
status: async () => ({ data: { "ses_custom_unstable": { type: "idle" } } }),
|
||||
},
|
||||
}
|
||||
|
||||
const tool = createDelegateTask({
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
userCategories: {
|
||||
"my-unstable-cat": {
|
||||
model: "openai/gpt-5.2",
|
||||
is_unstable_agent: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const toolContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "parent-message",
|
||||
agent: "Sisyphus",
|
||||
abort: new AbortController().signal,
|
||||
}
|
||||
|
||||
// #when - using custom unstable category with run_in_background=false
|
||||
const result = await tool.execute(
|
||||
{
|
||||
description: "Test custom unstable",
|
||||
prompt: "Do something",
|
||||
category: "my-unstable-cat",
|
||||
run_in_background: false,
|
||||
skills: [],
|
||||
},
|
||||
toolContext
|
||||
)
|
||||
|
||||
// #then - should launch as background BUT wait for and return actual result
|
||||
expect(launchCalled).toBe(true)
|
||||
expect(result).toContain("UNSTABLE AGENT")
|
||||
expect(result).toContain("Custom unstable result")
|
||||
}, { timeout: 20000 })
|
||||
})
|
||||
|
||||
describe("buildSystemContent", () => {
|
||||
test("returns undefined when no skills and no category promptAppend", () => {
|
||||
// #given
|
||||
|
||||
@@ -483,10 +483,9 @@ ${textContent || "(No text output)"}`
|
||||
: undefined
|
||||
categoryPromptAppend = resolved.promptAppend || undefined
|
||||
|
||||
// Unstable agent detection - force background mode for monitoring
|
||||
// Unstable agent detection - launch as background for monitoring but wait for result
|
||||
const isUnstableAgent = resolved.config.is_unstable_agent === true || actualModel.toLowerCase().includes("gemini")
|
||||
if (isUnstableAgent && args.run_in_background === false) {
|
||||
// Force background mode for unstable agents
|
||||
const systemContent = buildSystemContent({ skillContent, categoryPromptAppend })
|
||||
|
||||
try {
|
||||
@@ -503,21 +502,92 @@ ${textContent || "(No text output)"}`
|
||||
skillContent: systemContent,
|
||||
})
|
||||
|
||||
const sessionID = task.sessionID
|
||||
if (!sessionID) {
|
||||
return formatDetailedError(new Error("Background task launched but no sessionID returned"), {
|
||||
operation: "Launch background task (unstable agent)",
|
||||
args,
|
||||
agent: agentToUse,
|
||||
category: args.category,
|
||||
})
|
||||
}
|
||||
|
||||
ctx.metadata?.({
|
||||
title: args.description,
|
||||
metadata: { sessionId: task.sessionID, category: args.category },
|
||||
metadata: { sessionId: sessionID, category: args.category },
|
||||
})
|
||||
|
||||
return `[UNSTABLE AGENT MODE]
|
||||
const startTime = new Date()
|
||||
|
||||
This category uses an unstable/experimental model (${actualModel}).
|
||||
Forced to background mode for monitoring stability.
|
||||
// Poll for completion (same logic as sync mode)
|
||||
const POLL_INTERVAL_MS = 500
|
||||
const MAX_POLL_TIME_MS = 10 * 60 * 1000
|
||||
const MIN_STABILITY_TIME_MS = 10000
|
||||
const STABILITY_POLLS_REQUIRED = 3
|
||||
const pollStart = Date.now()
|
||||
let lastMsgCount = 0
|
||||
let stablePolls = 0
|
||||
|
||||
Task ID: ${task.id}
|
||||
Session ID: ${task.sessionID}
|
||||
while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
|
||||
if (ctx.abort?.aborted) {
|
||||
return `[UNSTABLE AGENT] Task aborted.\n\nSession ID: ${sessionID}`
|
||||
}
|
||||
|
||||
Monitor progress: Use \`background_output\` with task_id="${task.id}"
|
||||
Or watch the session directly for real-time updates.`
|
||||
await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
|
||||
|
||||
const statusResult = await client.session.status()
|
||||
const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
|
||||
const sessionStatus = allStatuses[sessionID]
|
||||
|
||||
if (sessionStatus && sessionStatus.type !== "idle") {
|
||||
stablePolls = 0
|
||||
lastMsgCount = 0
|
||||
continue
|
||||
}
|
||||
|
||||
if (Date.now() - pollStart < MIN_STABILITY_TIME_MS) continue
|
||||
|
||||
const messagesCheck = await client.session.messages({ path: { id: sessionID } })
|
||||
const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array<unknown>
|
||||
const currentMsgCount = msgs.length
|
||||
|
||||
if (currentMsgCount === lastMsgCount) {
|
||||
stablePolls++
|
||||
if (stablePolls >= STABILITY_POLLS_REQUIRED) break
|
||||
} else {
|
||||
stablePolls = 0
|
||||
lastMsgCount = currentMsgCount
|
||||
}
|
||||
}
|
||||
|
||||
const messagesResult = await client.session.messages({ path: { id: sessionID } })
|
||||
const messages = ((messagesResult as { data?: unknown }).data ?? messagesResult) as Array<{
|
||||
info?: { role?: string; time?: { created?: number } }
|
||||
parts?: Array<{ type?: string; text?: string }>
|
||||
}>
|
||||
|
||||
const assistantMessages = messages
|
||||
.filter((m) => m.info?.role === "assistant")
|
||||
.sort((a, b) => (b.info?.time?.created ?? 0) - (a.info?.time?.created ?? 0))
|
||||
const lastMessage = assistantMessages[0]
|
||||
|
||||
if (!lastMessage) {
|
||||
return `[UNSTABLE AGENT] No assistant response found.\n\nSession ID: ${sessionID}`
|
||||
}
|
||||
|
||||
const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
|
||||
const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")
|
||||
const duration = formatDuration(startTime)
|
||||
|
||||
return `[UNSTABLE AGENT] Task completed in ${duration}.
|
||||
|
||||
Model: ${actualModel} (unstable/experimental - launched via background for monitoring)
|
||||
Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
|
||||
Session ID: ${sessionID}
|
||||
|
||||
---
|
||||
|
||||
${textContent || "(No text output)"}`
|
||||
} catch (error) {
|
||||
return formatDetailedError(error, {
|
||||
operation: "Launch background task (unstable agent)",
|
||||
|
||||
Reference in New Issue
Block a user