feat(hooks): add unstable-agent-babysitter hook for monitoring unstable background agents

This commit is contained in:
justsisyphus
2026-02-01 16:59:50 +09:00
parent a5b2ae2895
commit 64356c520b
5 changed files with 425 additions and 0 deletions

View File

@@ -88,6 +88,7 @@ export const HookNameSchema = z.enum([
"sisyphus-junior-notepad",
"start-work",
"atlas",
"unstable-agent-babysitter",
"stop-continuation-guard",
])
@@ -308,6 +309,11 @@ export const NotificationConfigSchema = z.object({
force_enable: z.boolean().optional(),
})
export const BabysittingConfigSchema = z.object({
enabled: z.boolean().default(false),
timeout_ms: z.number().default(120000),
})
export const GitMasterConfigSchema = z.object({
/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */
commit_footer: z.boolean().default(true),
@@ -383,6 +389,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
ralph_loop: RalphLoopConfigSchema.optional(),
background_task: BackgroundTaskConfigSchema.optional(),
notification: NotificationConfigSchema.optional(),
babysitting: BabysittingConfigSchema.optional(),
git_master: GitMasterConfigSchema.optional(),
browser_automation_engine: BrowserAutomationConfigSchema.optional(),
tmux: TmuxConfigSchema.optional(),
@@ -405,6 +412,7 @@ export type SkillsConfig = z.infer<typeof SkillsConfigSchema>
export type SkillDefinition = z.infer<typeof SkillDefinitionSchema>
export type RalphLoopConfig = z.infer<typeof RalphLoopConfigSchema>
export type NotificationConfig = z.infer<typeof NotificationConfigSchema>
export type BabysittingConfig = z.infer<typeof BabysittingConfigSchema>
export type CategoryConfig = z.infer<typeof CategoryConfigSchema>
export type CategoriesConfig = z.infer<typeof CategoriesConfigSchema>
export type BuiltinCategoryName = z.infer<typeof BuiltinCategoryNameSchema>

View File

@@ -35,3 +35,4 @@ export { createQuestionLabelTruncatorHook } from "./question-label-truncator";
export { createSubagentQuestionBlockerHook } from "./subagent-question-blocker";
export { createStopContinuationGuardHook, type StopContinuationGuard } from "./stop-continuation-guard";
export { createCompactionContextInjector, type SummarizeContext } from "./compaction-context-injector";
export { createUnstableAgentBabysitterHook } from "./unstable-agent-babysitter";

View File

@@ -0,0 +1,142 @@
import { _resetForTesting, setMainSession } from "../../features/claude-code-session-state"
import type { BackgroundTask } from "../../features/background-agent"
import { createUnstableAgentBabysitterHook } from "./index"
const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
type BabysitterContext = Parameters<typeof createUnstableAgentBabysitterHook>[0]
function createMockPluginInput(options: {
messagesBySession: Record<string, unknown[]>
promptCalls: Array<{ input: unknown }>
}): BabysitterContext {
const { messagesBySession, promptCalls } = options
return {
directory: projectDir,
client: {
session: {
messages: async ({ path }: { path: { id: string } }) => ({
data: messagesBySession[path.id] ?? [],
}),
prompt: async (input: unknown) => {
promptCalls.push({ input })
},
},
},
}
}
function createBackgroundManager(tasks: BackgroundTask[]) {
return {
getTasksByParentSession: () => tasks,
}
}
function createTask(overrides: Partial<BackgroundTask> = {}): BackgroundTask {
return {
id: "task-1",
sessionID: "bg-1",
parentSessionID: "main-1",
parentMessageID: "msg-1",
description: "unstable task",
prompt: "run work",
agent: "test-agent",
status: "running",
progress: {
toolCalls: 1,
lastUpdate: new Date(),
lastMessage: "still working",
lastMessageAt: new Date(Date.now() - 121000),
},
model: { providerID: "google", modelID: "gemini-1.5" },
...overrides,
}
}
describe("unstable-agent-babysitter hook", () => {
afterEach(() => {
_resetForTesting()
})
test("fires reminder for hung gemini task", async () => {
// #given
setMainSession("main-1")
const promptCalls: Array<{ input: unknown }> = []
const ctx = createMockPluginInput({
messagesBySession: {
"main-1": [
{ info: { agent: "sisyphus", model: { providerID: "openai", modelID: "gpt-4" } } },
],
"bg-1": [
{ info: { role: "assistant" }, parts: [{ type: "thinking", thinking: "deep thought" }] },
],
},
promptCalls,
})
const backgroundManager = createBackgroundManager([createTask()])
const hook = createUnstableAgentBabysitterHook(ctx, {
backgroundManager,
config: { enabled: true, timeout_ms: 120000 },
})
// #when
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
// #then
expect(promptCalls.length).toBe(1)
const payload = promptCalls[0].input as { body?: { parts?: Array<{ text?: string }> } }
const text = payload.body?.parts?.[0]?.text ?? ""
expect(text).toContain("background_output")
expect(text).toContain("background_cancel")
expect(text).toContain("deep thought")
})
test("does not remind stable model tasks", async () => {
// #given
setMainSession("main-1")
const promptCalls: Array<{ input: unknown }> = []
const ctx = createMockPluginInput({
messagesBySession: { "main-1": [] },
promptCalls,
})
const backgroundManager = createBackgroundManager([
createTask({ model: { providerID: "openai", modelID: "gpt-4" } }),
])
const hook = createUnstableAgentBabysitterHook(ctx, {
backgroundManager,
config: { enabled: true, timeout_ms: 120000 },
})
// #when
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
// #then
expect(promptCalls.length).toBe(0)
})
test("respects per-task cooldown", async () => {
// #given
setMainSession("main-1")
const promptCalls: Array<{ input: unknown }> = []
const ctx = createMockPluginInput({
messagesBySession: { "main-1": [], "bg-1": [] },
promptCalls,
})
const backgroundManager = createBackgroundManager([createTask()])
const hook = createUnstableAgentBabysitterHook(ctx, {
backgroundManager,
config: { enabled: true, timeout_ms: 120000 },
})
const now = Date.now()
const originalNow = Date.now
Date.now = () => now
// #when
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
// #then
expect(promptCalls.length).toBe(1)
Date.now = originalNow
})
})

View File

@@ -0,0 +1,243 @@
import type { BackgroundManager, BackgroundTask } from "../../features/background-agent"
import { getMainSessionID, getSessionAgent } from "../../features/claude-code-session-state"
import { log } from "../../shared/logger"
const HOOK_NAME = "unstable-agent-babysitter"
const DEFAULT_TIMEOUT_MS = 120000
const COOLDOWN_MS = 5 * 60 * 1000
const THINKING_SUMMARY_MAX_CHARS = 500
type BabysittingConfig = {
enabled?: boolean
timeout_ms?: number
}
type BabysitterContext = {
directory: string
client: {
session: {
messages: (args: { path: { id: string } }) => Promise<{ data?: unknown } | unknown[]>
prompt: (args: {
path: { id: string }
body: {
parts: Array<{ type: "text"; text: string }>
agent?: string
model?: { providerID: string; modelID: string }
}
query?: { directory?: string }
}) => Promise<unknown>
}
}
}
type BabysitterOptions = {
backgroundManager: Pick<BackgroundManager, "getTasksByParentSession">
config?: BabysittingConfig
}
type MessageInfo = {
role?: string
agent?: string
model?: { providerID: string; modelID: string }
providerID?: string
modelID?: string
}
type MessagePart = {
type?: string
text?: string
thinking?: string
}
function hasData(value: unknown): value is { data?: unknown } {
return typeof value === "object" && value !== null && "data" in value
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null
}
function getMessageInfo(value: unknown): MessageInfo | undefined {
if (!isRecord(value)) return undefined
if (!isRecord(value.info)) return undefined
const info = value.info
const modelValue = isRecord(info.model)
? info.model
: undefined
const model = modelValue && typeof modelValue.providerID === "string" && typeof modelValue.modelID === "string"
? { providerID: modelValue.providerID, modelID: modelValue.modelID }
: undefined
return {
role: typeof info.role === "string" ? info.role : undefined,
agent: typeof info.agent === "string" ? info.agent : undefined,
model,
providerID: typeof info.providerID === "string" ? info.providerID : undefined,
modelID: typeof info.modelID === "string" ? info.modelID : undefined,
}
}
function getMessageParts(value: unknown): MessagePart[] {
if (!isRecord(value)) return []
if (!Array.isArray(value.parts)) return []
return value.parts.filter(isRecord).map((part) => ({
type: typeof part.type === "string" ? part.type : undefined,
text: typeof part.text === "string" ? part.text : undefined,
thinking: typeof part.thinking === "string" ? part.thinking : undefined,
}))
}
function extractMessages(value: unknown): unknown[] {
if (Array.isArray(value)) {
return value
}
if (hasData(value) && Array.isArray(value.data)) {
return value.data
}
return []
}
function isUnstableTask(task: BackgroundTask): boolean {
if (task.isUnstableAgent === true) return true
const modelId = task.model?.modelID?.toLowerCase()
return modelId ? modelId.includes("gemini") : false
}
async function resolveMainSessionTarget(
ctx: BabysitterContext,
sessionID: string
): Promise<{ agent?: string; model?: { providerID: string; modelID: string } }> {
let agent = getSessionAgent(sessionID)
let model: { providerID: string; modelID: string } | undefined
try {
const messagesResp = await ctx.client.session.messages({
path: { id: sessionID },
})
const messages = extractMessages(messagesResp)
for (let i = messages.length - 1; i >= 0; i--) {
const info = getMessageInfo(messages[i])
if (info?.agent || info?.model || (info?.providerID && info?.modelID)) {
agent = agent ?? info?.agent
model = info?.model ?? (info?.providerID && info?.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
break
}
}
} catch (error) {
log(`[${HOOK_NAME}] Failed to resolve main session agent`, { sessionID, error: String(error) })
}
return { agent, model }
}
async function getThinkingSummary(ctx: BabysitterContext, sessionID: string): Promise<string | null> {
try {
const messagesResp = await ctx.client.session.messages({
path: { id: sessionID },
})
const messages = extractMessages(messagesResp)
const chunks: string[] = []
for (const message of messages) {
const info = getMessageInfo(message)
if (info?.role !== "assistant") continue
const parts = getMessageParts(message)
for (const part of parts) {
if (part.type === "thinking" && part.thinking) {
chunks.push(part.thinking)
}
if (part.type === "reasoning" && part.text) {
chunks.push(part.text)
}
}
}
const combined = chunks.join("\n").trim()
if (!combined) return null
if (combined.length <= THINKING_SUMMARY_MAX_CHARS) return combined
return combined.slice(0, THINKING_SUMMARY_MAX_CHARS) + "..."
} catch (error) {
log(`[${HOOK_NAME}] Failed to fetch thinking summary`, { sessionID, error: String(error) })
return null
}
}
function buildReminder(task: BackgroundTask, summary: string | null, idleMs: number): string {
const idleSeconds = Math.round(idleMs / 1000)
const summaryText = summary ?? "(No thinking trace available)"
return `Unstable background agent appears idle for ${idleSeconds}s.
Task ID: ${task.id}
Description: ${task.description}
Agent: ${task.agent}
Status: ${task.status}
Session ID: ${task.sessionID ?? "N/A"}
Thinking summary (first ${THINKING_SUMMARY_MAX_CHARS} chars):
${summaryText}
Suggested actions:
- background_output task_id="${task.id}" full_session=true include_thinking=true include_tool_results=true message_limit=50
- background_cancel taskId="${task.id}"
This is a reminder only. No automatic action was taken.`
}
export function createUnstableAgentBabysitterHook(ctx: BabysitterContext, options: BabysitterOptions) {
const reminderCooldowns = new Map<string, number>()
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
if (event.type !== "session.idle") return
if (options.config?.enabled !== true) return
const props = event.properties as Record<string, unknown> | undefined
const sessionID = props?.sessionID as string | undefined
if (!sessionID) return
const mainSessionID = getMainSessionID()
if (!mainSessionID || sessionID !== mainSessionID) return
const tasks = options.backgroundManager.getTasksByParentSession(mainSessionID)
if (tasks.length === 0) return
const timeoutMs = options.config?.timeout_ms ?? DEFAULT_TIMEOUT_MS
const now = Date.now()
for (const task of tasks) {
if (task.status !== "running") continue
if (!isUnstableTask(task)) continue
const lastMessageAt = task.progress?.lastMessageAt
if (!lastMessageAt) continue
const idleMs = now - lastMessageAt.getTime()
if (idleMs < timeoutMs) continue
const lastReminderAt = reminderCooldowns.get(task.id)
if (lastReminderAt && now - lastReminderAt < COOLDOWN_MS) continue
const summary = task.sessionID ? await getThinkingSummary(ctx, task.sessionID) : null
const reminder = buildReminder(task, summary, idleMs)
const { agent, model } = await resolveMainSessionTarget(ctx, mainSessionID)
try {
await ctx.client.session.prompt({
path: { id: mainSessionID },
body: {
...(agent ? { agent } : {}),
...(model ? { model } : {}),
parts: [{ type: "text", text: reminder }],
},
query: { directory: ctx.directory },
})
reminderCooldowns.set(task.id, now)
log(`[${HOOK_NAME}] Reminder injected`, { taskId: task.id, sessionID: mainSessionID })
} catch (error) {
log(`[${HOOK_NAME}] Reminder injection failed`, { taskId: task.id, error: String(error) })
}
}
}
return {
event: eventHandler,
}
}

View File

@@ -35,6 +35,7 @@ import {
createSubagentQuestionBlockerHook,
createStopContinuationGuardHook,
createCompactionContextInjector,
createUnstableAgentBabysitterHook,
} from "./hooks";
import {
contextCollector,
@@ -290,6 +291,35 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
})
: null;
const unstableAgentBabysitter =
isHookEnabled("unstable-agent-babysitter") && pluginConfig.babysitting?.enabled === true
? createUnstableAgentBabysitterHook(
{
directory: ctx.directory,
client: {
session: {
messages: async (args) => {
const result = await ctx.client.session.messages(args)
if (Array.isArray(result)) return result
if (typeof result === "object" && result !== null && "data" in result) {
const record = result as Record<string, unknown>
return { data: record.data }
}
return []
},
prompt: async (args) => {
await ctx.client.session.prompt(args)
},
},
},
},
{
backgroundManager,
config: pluginConfig.babysitting,
}
)
: null;
if (sessionRecovery && todoContinuationEnforcer) {
sessionRecovery.setOnAbortCallback(todoContinuationEnforcer.markRecovering);
sessionRecovery.setOnRecoveryCompleteCallback(
@@ -520,6 +550,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
await backgroundNotificationHook?.event(input);
await sessionNotification?.(input);
await todoContinuationEnforcer?.handler(input);
await unstableAgentBabysitter?.event(input);
await contextWindowMonitor?.event(input);
await directoryAgentsInjector?.event(input);
await directoryReadmeInjector?.event(input);