chore: treat minimax as unstable model requiring background monitoring

This commit is contained in:
justsisyphus
2026-02-01 17:20:01 +09:00
parent 0dafdde173
commit ab54e6ccdc
5 changed files with 108 additions and 3 deletions

View File

@@ -182,7 +182,7 @@ export const CategoryConfigSchema = z.object({
textVerbosity: z.enum(["low", "medium", "high"]).optional(),
tools: z.record(z.string(), z.boolean()).optional(),
prompt_append: z.string().optional(),
/** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. */
/** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */
is_unstable_agent: z.boolean().optional(),
})

View File

@@ -91,6 +91,41 @@ describe("unstable-agent-babysitter hook", () => {
expect(text).toContain("deep thought")
})
test("fires reminder for hung minimax task", async () => {
// #given
setMainSession("main-1")
const promptCalls: Array<{ input: unknown }> = []
const ctx = createMockPluginInput({
messagesBySession: {
"main-1": [
{ info: { agent: "sisyphus", model: { providerID: "openai", modelID: "gpt-4" } } },
],
"bg-1": [
{ info: { role: "assistant" }, parts: [{ type: "thinking", thinking: "minimax thought" }] },
],
},
promptCalls,
})
const backgroundManager = createBackgroundManager([
createTask({ model: { providerID: "minimax", modelID: "minimax-1" } }),
])
const hook = createUnstableAgentBabysitterHook(ctx, {
backgroundManager,
config: { enabled: true, timeout_ms: 120000 },
})
// #when
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
// #then
expect(promptCalls.length).toBe(1)
const payload = promptCalls[0].input as { body?: { parts?: Array<{ text?: string }> } }
const text = payload.body?.parts?.[0]?.text ?? ""
expect(text).toContain("background_output")
expect(text).toContain("background_cancel")
expect(text).toContain("minimax thought")
})
test("does not remind stable model tasks", async () => {
// #given
setMainSession("main-1")

View File

@@ -99,7 +99,7 @@ function extractMessages(value: unknown): unknown[] {
function isUnstableTask(task: BackgroundTask): boolean {
if (task.isUnstableAgent === true) return true
const modelId = task.model?.modelID?.toLowerCase()
return modelId ? modelId.includes("gemini") : false
return modelId ? modelId.includes("gemini") || modelId.includes("minimax") : false
}
async function resolveMainSessionTarget(

View File

@@ -873,7 +873,8 @@ Available categories: ${categoryNames.join(", ")}`,
}
}
const isUnstableAgent = resolved.config.is_unstable_agent === true || (actualModel?.toLowerCase().includes("gemini") ?? false)
const unstableModel = actualModel?.toLowerCase()
const isUnstableAgent = resolved.config.is_unstable_agent === true || (unstableModel ? unstableModel.includes("gemini") || unstableModel.includes("minimax") : false)
return {
agentToUse: SISYPHUS_JUNIOR_AGENT,

View File

@@ -1261,6 +1261,75 @@ describe("sisyphus-task", () => {
expect(result).toContain("task-normal-bg")
})
test("minimax model with run_in_background=false should force background but wait for result", async () => {
// given - custom category using minimax model with run_in_background=false
const { createDelegateTask } = require("./tools")
let launchCalled = false
const mockManager = {
launch: async () => {
launchCalled = true
return {
id: "task-unstable-minimax",
sessionID: "ses_unstable_minimax",
description: "Unstable minimax task",
agent: "sisyphus-junior",
status: "running",
}
},
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_unstable_minimax" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({
data: [
{ info: { role: "assistant", time: { created: Date.now() } }, parts: [{ type: "text", text: "Minimax task completed successfully" }] }
]
}),
status: async () => ({ data: { "ses_unstable_minimax": { type: "idle" } } }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
userCategories: {
"minimax-cat": {
model: "minimax/abab-5",
},
},
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "sisyphus",
abort: new AbortController().signal,
}
// when - using minimax category with run_in_background=false
const result = await tool.execute(
{
description: "Test minimax forced background",
prompt: "Do something with minimax",
category: "minimax-cat",
run_in_background: false,
load_skills: ["git-master"],
},
toolContext
)
// then - should launch as background BUT wait for and return actual result
expect(launchCalled).toBe(true)
expect(result).toContain("SUPERVISED TASK COMPLETED")
expect(result).toContain("Minimax task completed successfully")
}, { timeout: 20000 })
test("non-gemini model with run_in_background=false should run sync (not forced to background)", async () => {
// given - category using non-gemini model with run_in_background=false
const { createDelegateTask } = require("./tools")