fix(delegate-task): wait for result when forcing unstable agents to background

Previously, unstable agents (gemini models, is_unstable_agent=true) were
forced to background mode but returned immediately with task ID. This
caused callers to lose visibility into results.

Now: launch as background for monitoring stability, but poll and wait
for completion, returning actual task output like sync mode.

🤖 Generated with assistance of [OhMyOpenCode](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
justsisyphus
2026-01-21 00:06:05 +09:00
parent c00f210922
commit 516edb445c
2 changed files with 463 additions and 10 deletions

View File

@@ -958,6 +958,389 @@ describe("sisyphus-task", () => {
}, { timeout: 20000 })
})
describe("unstable agent forced background mode", () => {
test("gemini model with run_in_background=false should force background but wait for result", async () => {
// #given - category using gemini model with run_in_background=false
const { createDelegateTask } = require("./tools")
let launchCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return {
id: "task-unstable",
sessionID: "ses_unstable_gemini",
description: "Unstable gemini task",
agent: "Sisyphus-Junior",
status: "running",
}
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_unstable_gemini" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({
data: [
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Gemini task completed successfully" }] }
]
}),
status: async () => ({ data: { "ses_unstable_gemini": { type: "idle" } } }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "Sisyphus",
abort: new AbortController().signal,
}
// #when - using visual-engineering (gemini model) with run_in_background=false
const result = await tool.execute(
{
description: "Test gemini forced background",
prompt: "Do something visual",
category: "visual-engineering",
run_in_background: false,
skills: [],
},
toolContext
)
// #then - should launch as background BUT wait for and return actual result
expect(launchCalled).toBe(true)
expect(result).toContain("UNSTABLE AGENT")
expect(result).toContain("Gemini task completed successfully")
}, { timeout: 20000 })
test("gemini model with run_in_background=true should not show unstable message (normal background)", async () => {
// #given - category using gemini model with run_in_background=true (normal background flow)
const { createDelegateTask } = require("./tools")
let launchCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return {
id: "task-normal-bg",
sessionID: "ses_normal_bg",
description: "Normal background task",
agent: "Sisyphus-Junior",
status: "running",
}
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
create: async () => ({ data: { id: "test-session" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({ data: [] }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "Sisyphus",
abort: new AbortController().signal,
}
// #when - using visual-engineering with run_in_background=true (normal background)
const result = await tool.execute(
{
description: "Test normal background",
prompt: "Do something visual",
category: "visual-engineering",
run_in_background: true, // User explicitly says true - normal background
skills: [],
},
toolContext
)
// #then - should NOT show unstable message (it's normal background flow)
expect(launchCalled).toBe(true)
expect(result).not.toContain("UNSTABLE AGENT MODE")
expect(result).toContain("task-normal-bg")
})
test("non-gemini model with run_in_background=false should run sync (not forced to background)", async () => {
// #given - category using non-gemini model with run_in_background=false
const { createDelegateTask } = require("./tools")
let launchCalled = false
let promptCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return { id: "should-not-be-called", sessionID: "x", description: "x", agent: "x", status: "running" }
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_sync_non_gemini" } }),
prompt: async () => {
promptCalled = true
return { data: {} }
},
messages: async () => ({
data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done sync" }] }]
}),
status: async () => ({ data: { "ses_sync_non_gemini": { type: "idle" } } }),
},
}
// Use ultrabrain which uses gpt-5.2 (non-gemini)
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "Sisyphus",
abort: new AbortController().signal,
}
// #when - using ultrabrain (gpt model) with run_in_background=false
const result = await tool.execute(
{
description: "Test non-gemini sync",
prompt: "Do something smart",
category: "ultrabrain",
run_in_background: false,
skills: [],
},
toolContext
)
// #then - should run sync, NOT forced to background
expect(launchCalled).toBe(false) // manager.launch should NOT be called
expect(promptCalled).toBe(true) // sync mode uses session.prompt
expect(result).not.toContain("UNSTABLE AGENT MODE")
}, { timeout: 20000 })
test("artistry category (gemini) with run_in_background=false should force background but wait for result", async () => {
// #given - artistry also uses gemini model
const { createDelegateTask } = require("./tools")
let launchCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return {
id: "task-artistry",
sessionID: "ses_artistry_gemini",
description: "Artistry gemini task",
agent: "Sisyphus-Junior",
status: "running",
}
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_artistry_gemini" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({
data: [
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Artistry result here" }] }
]
}),
status: async () => ({ data: { "ses_artistry_gemini": { type: "idle" } } }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "Sisyphus",
abort: new AbortController().signal,
}
// #when - artistry category (gemini-3-pro-preview with max variant)
const result = await tool.execute(
{
description: "Test artistry forced background",
prompt: "Do something artistic",
category: "artistry",
run_in_background: false,
skills: [],
},
toolContext
)
// #then - should launch as background BUT wait for and return actual result
expect(launchCalled).toBe(true)
expect(result).toContain("UNSTABLE AGENT")
expect(result).toContain("Artistry result here")
}, { timeout: 20000 })
test("writing category (gemini-flash) with run_in_background=false should force background but wait for result", async () => {
// #given - writing uses gemini-3-flash-preview
const { createDelegateTask } = require("./tools")
let launchCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return {
id: "task-writing",
sessionID: "ses_writing_gemini",
description: "Writing gemini task",
agent: "Sisyphus-Junior",
status: "running",
}
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_writing_gemini" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({
data: [
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Writing result here" }] }
]
}),
status: async () => ({ data: { "ses_writing_gemini": { type: "idle" } } }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "Sisyphus",
abort: new AbortController().signal,
}
// #when - writing category (gemini-3-flash-preview)
const result = await tool.execute(
{
description: "Test writing forced background",
prompt: "Write something",
category: "writing",
run_in_background: false,
skills: [],
},
toolContext
)
// #then - should launch as background BUT wait for and return actual result
expect(launchCalled).toBe(true)
expect(result).toContain("UNSTABLE AGENT")
expect(result).toContain("Writing result here")
}, { timeout: 20000 })
test("is_unstable_agent=true should force background but wait for result", async () => {
// #given - custom category with is_unstable_agent=true but non-gemini model
const { createDelegateTask } = require("./tools")
let launchCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return {
id: "task-custom-unstable",
sessionID: "ses_custom_unstable",
description: "Custom unstable task",
agent: "Sisyphus-Junior",
status: "running",
}
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_custom_unstable" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({
data: [
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Custom unstable result" }] }
]
}),
status: async () => ({ data: { "ses_custom_unstable": { type: "idle" } } }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
userCategories: {
"my-unstable-cat": {
model: "openai/gpt-5.2",
is_unstable_agent: true,
},
},
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "Sisyphus",
abort: new AbortController().signal,
}
// #when - using custom unstable category with run_in_background=false
const result = await tool.execute(
{
description: "Test custom unstable",
prompt: "Do something",
category: "my-unstable-cat",
run_in_background: false,
skills: [],
},
toolContext
)
// #then - should launch as background BUT wait for and return actual result
expect(launchCalled).toBe(true)
expect(result).toContain("UNSTABLE AGENT")
expect(result).toContain("Custom unstable result")
}, { timeout: 20000 })
})
describe("buildSystemContent", () => {
test("returns undefined when no skills and no category promptAppend", () => {
// #given

View File

@@ -483,10 +483,9 @@ ${textContent || "(No text output)"}`
: undefined
categoryPromptAppend = resolved.promptAppend || undefined
// Unstable agent detection - force background mode for monitoring
// Unstable agent detection - launch as background for monitoring but wait for result
const isUnstableAgent = resolved.config.is_unstable_agent === true || actualModel.toLowerCase().includes("gemini")
if (isUnstableAgent && args.run_in_background === false) {
// Force background mode for unstable agents
const systemContent = buildSystemContent({ skillContent, categoryPromptAppend })
try {
@@ -503,21 +502,92 @@ ${textContent || "(No text output)"}`
skillContent: systemContent,
})
const sessionID = task.sessionID
if (!sessionID) {
return formatDetailedError(new Error("Background task launched but no sessionID returned"), {
operation: "Launch background task (unstable agent)",
args,
agent: agentToUse,
category: args.category,
})
}
ctx.metadata?.({
title: args.description,
metadata: { sessionId: task.sessionID, category: args.category },
metadata: { sessionId: sessionID, category: args.category },
})
return `[UNSTABLE AGENT MODE]
const startTime = new Date()
This category uses an unstable/experimental model (${actualModel}).
Forced to background mode for monitoring stability.
// Poll for completion (same logic as sync mode)
const POLL_INTERVAL_MS = 500
const MAX_POLL_TIME_MS = 10 * 60 * 1000
const MIN_STABILITY_TIME_MS = 10000
const STABILITY_POLLS_REQUIRED = 3
const pollStart = Date.now()
let lastMsgCount = 0
let stablePolls = 0
Task ID: ${task.id}
Session ID: ${task.sessionID}
while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
if (ctx.abort?.aborted) {
return `[UNSTABLE AGENT] Task aborted.\n\nSession ID: ${sessionID}`
}
Monitor progress: Use \`background_output\` with task_id="${task.id}"
Or watch the session directly for real-time updates.`
await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
const statusResult = await client.session.status()
const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
const sessionStatus = allStatuses[sessionID]
if (sessionStatus && sessionStatus.type !== "idle") {
stablePolls = 0
lastMsgCount = 0
continue
}
if (Date.now() - pollStart < MIN_STABILITY_TIME_MS) continue
const messagesCheck = await client.session.messages({ path: { id: sessionID } })
const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array<unknown>
const currentMsgCount = msgs.length
if (currentMsgCount === lastMsgCount) {
stablePolls++
if (stablePolls >= STABILITY_POLLS_REQUIRED) break
} else {
stablePolls = 0
lastMsgCount = currentMsgCount
}
}
const messagesResult = await client.session.messages({ path: { id: sessionID } })
const messages = ((messagesResult as { data?: unknown }).data ?? messagesResult) as Array<{
info?: { role?: string; time?: { created?: number } }
parts?: Array<{ type?: string; text?: string }>
}>
const assistantMessages = messages
.filter((m) => m.info?.role === "assistant")
.sort((a, b) => (b.info?.time?.created ?? 0) - (a.info?.time?.created ?? 0))
const lastMessage = assistantMessages[0]
if (!lastMessage) {
return `[UNSTABLE AGENT] No assistant response found.\n\nSession ID: ${sessionID}`
}
const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")
const duration = formatDuration(startTime)
return `[UNSTABLE AGENT] Task completed in ${duration}.
Model: ${actualModel} (unstable/experimental - launched via background for monitoring)
Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
Session ID: ${sessionID}
---
${textContent || "(No text output)"}`
} catch (error) {
return formatDetailedError(error, {
operation: "Launch background task (unstable agent)",