diff --git a/src/config/schema.ts b/src/config/schema.ts index 1598ed710..b35b1938e 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -88,6 +88,7 @@ export const HookNameSchema = z.enum([ "sisyphus-junior-notepad", "start-work", "atlas", + "unstable-agent-babysitter", "stop-continuation-guard", ]) @@ -308,6 +309,11 @@ export const NotificationConfigSchema = z.object({ force_enable: z.boolean().optional(), }) +export const BabysittingConfigSchema = z.object({ + enabled: z.boolean().default(false), + timeout_ms: z.number().default(120000), +}) + export const GitMasterConfigSchema = z.object({ /** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */ commit_footer: z.boolean().default(true), @@ -383,6 +389,7 @@ export const OhMyOpenCodeConfigSchema = z.object({ ralph_loop: RalphLoopConfigSchema.optional(), background_task: BackgroundTaskConfigSchema.optional(), notification: NotificationConfigSchema.optional(), + babysitting: BabysittingConfigSchema.optional(), git_master: GitMasterConfigSchema.optional(), browser_automation_engine: BrowserAutomationConfigSchema.optional(), tmux: TmuxConfigSchema.optional(), @@ -405,6 +412,7 @@ export type SkillsConfig = z.infer export type SkillDefinition = z.infer export type RalphLoopConfig = z.infer export type NotificationConfig = z.infer +export type BabysittingConfig = z.infer export type CategoryConfig = z.infer export type CategoriesConfig = z.infer export type BuiltinCategoryName = z.infer diff --git a/src/hooks/index.ts b/src/hooks/index.ts index 0b0f4bf3e..793f07323 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -35,3 +35,4 @@ export { createQuestionLabelTruncatorHook } from "./question-label-truncator"; export { createSubagentQuestionBlockerHook } from "./subagent-question-blocker"; export { createStopContinuationGuardHook, type StopContinuationGuard } from "./stop-continuation-guard"; export { createCompactionContextInjector, type SummarizeContext } from "./compaction-context-injector"; +export { createUnstableAgentBabysitterHook } from "./unstable-agent-babysitter"; diff --git a/src/hooks/unstable-agent-babysitter/index.test.ts b/src/hooks/unstable-agent-babysitter/index.test.ts new file mode 100644 index 000000000..a98f47100 --- /dev/null +++ b/src/hooks/unstable-agent-babysitter/index.test.ts @@ -0,0 +1,142 @@ +import { _resetForTesting, setMainSession } from "../../features/claude-code-session-state" +import type { BackgroundTask } from "../../features/background-agent" +import { createUnstableAgentBabysitterHook } from "./index" + +const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode" + +type BabysitterContext = Parameters[0] + +function createMockPluginInput(options: { + messagesBySession: Record + promptCalls: Array<{ input: unknown }> +}): BabysitterContext { + const { messagesBySession, promptCalls } = options + return { + directory: projectDir, + client: { + session: { + messages: async ({ path }: { path: { id: string } }) => ({ + data: messagesBySession[path.id] ?? [], + }), + prompt: async (input: unknown) => { + promptCalls.push({ input }) + }, + }, + }, + } +} + +function createBackgroundManager(tasks: BackgroundTask[]) { + return { + getTasksByParentSession: () => tasks, + } +} + +function createTask(overrides: Partial = {}): BackgroundTask { + return { + id: "task-1", + sessionID: "bg-1", + parentSessionID: "main-1", + parentMessageID: "msg-1", + description: "unstable task", + prompt: "run work", + agent: "test-agent", + status: "running", + progress: { + toolCalls: 1, + lastUpdate: new Date(), + lastMessage: "still working", + lastMessageAt: new Date(Date.now() - 121000), + }, + model: { providerID: "google", modelID: "gemini-1.5" }, + ...overrides, + } +} + +describe("unstable-agent-babysitter hook", () => { + afterEach(() => { + _resetForTesting() + }) + + test("fires reminder for hung gemini task", async () => { + // #given + setMainSession("main-1") + const promptCalls: Array<{ input: unknown }> = [] + const ctx = createMockPluginInput({ + messagesBySession: { + "main-1": [ + { info: { agent: "sisyphus", model: { providerID: "openai", modelID: "gpt-4" } } }, + ], + "bg-1": [ + { info: { role: "assistant" }, parts: [{ type: "thinking", thinking: "deep thought" }] }, + ], + }, + promptCalls, + }) + const backgroundManager = createBackgroundManager([createTask()]) + const hook = createUnstableAgentBabysitterHook(ctx, { + backgroundManager, + config: { enabled: true, timeout_ms: 120000 }, + }) + + // #when + await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } }) + + // #then + expect(promptCalls.length).toBe(1) + const payload = promptCalls[0].input as { body?: { parts?: Array<{ text?: string }> } } + const text = payload.body?.parts?.[0]?.text ?? "" + expect(text).toContain("background_output") + expect(text).toContain("background_cancel") + expect(text).toContain("deep thought") + }) + + test("does not remind stable model tasks", async () => { + // #given + setMainSession("main-1") + const promptCalls: Array<{ input: unknown }> = [] + const ctx = createMockPluginInput({ + messagesBySession: { "main-1": [] }, + promptCalls, + }) + const backgroundManager = createBackgroundManager([ + createTask({ model: { providerID: "openai", modelID: "gpt-4" } }), + ]) + const hook = createUnstableAgentBabysitterHook(ctx, { + backgroundManager, + config: { enabled: true, timeout_ms: 120000 }, + }) + + // #when + await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } }) + + // #then + expect(promptCalls.length).toBe(0) + }) + + test("respects per-task cooldown", async () => { + // #given + setMainSession("main-1") + const promptCalls: Array<{ input: unknown }> = [] + const ctx = createMockPluginInput({ + messagesBySession: { "main-1": [], "bg-1": [] }, + promptCalls, + }) + const backgroundManager = createBackgroundManager([createTask()]) + const hook = createUnstableAgentBabysitterHook(ctx, { + backgroundManager, + config: { enabled: true, timeout_ms: 120000 }, + }) + const now = Date.now() + const originalNow = Date.now + Date.now = () => now + + // #when + await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } }) + await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } }) + + // #then + expect(promptCalls.length).toBe(1) + Date.now = originalNow + }) +}) diff --git a/src/hooks/unstable-agent-babysitter/index.ts b/src/hooks/unstable-agent-babysitter/index.ts new file mode 100644 index 000000000..5b27889c0 --- /dev/null +++ b/src/hooks/unstable-agent-babysitter/index.ts @@ -0,0 +1,243 @@ +import type { BackgroundManager, BackgroundTask } from "../../features/background-agent" +import { getMainSessionID, getSessionAgent } from "../../features/claude-code-session-state" +import { log } from "../../shared/logger" + +const HOOK_NAME = "unstable-agent-babysitter" +const DEFAULT_TIMEOUT_MS = 120000 +const COOLDOWN_MS = 5 * 60 * 1000 +const THINKING_SUMMARY_MAX_CHARS = 500 + +type BabysittingConfig = { + enabled?: boolean + timeout_ms?: number +} + +type BabysitterContext = { + directory: string + client: { + session: { + messages: (args: { path: { id: string } }) => Promise<{ data?: unknown } | unknown[]> + prompt: (args: { + path: { id: string } + body: { + parts: Array<{ type: "text"; text: string }> + agent?: string + model?: { providerID: string; modelID: string } + } + query?: { directory?: string } + }) => Promise + } + } +} + +type BabysitterOptions = { + backgroundManager: Pick + config?: BabysittingConfig +} + +type MessageInfo = { + role?: string + agent?: string + model?: { providerID: string; modelID: string } + providerID?: string + modelID?: string +} + +type MessagePart = { + type?: string + text?: string + thinking?: string +} + +function hasData(value: unknown): value is { data?: unknown } { + return typeof value === "object" && value !== null && "data" in value +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +function getMessageInfo(value: unknown): MessageInfo | undefined { + if (!isRecord(value)) return undefined + if (!isRecord(value.info)) return undefined + const info = value.info + const modelValue = isRecord(info.model) + ? info.model + : undefined + const model = modelValue && typeof modelValue.providerID === "string" && typeof modelValue.modelID === "string" + ? { providerID: modelValue.providerID, modelID: modelValue.modelID } + : undefined + return { + role: typeof info.role === "string" ? info.role : undefined, + agent: typeof info.agent === "string" ? info.agent : undefined, + model, + providerID: typeof info.providerID === "string" ? info.providerID : undefined, + modelID: typeof info.modelID === "string" ? info.modelID : undefined, + } +} + +function getMessageParts(value: unknown): MessagePart[] { + if (!isRecord(value)) return [] + if (!Array.isArray(value.parts)) return [] + return value.parts.filter(isRecord).map((part) => ({ + type: typeof part.type === "string" ? part.type : undefined, + text: typeof part.text === "string" ? part.text : undefined, + thinking: typeof part.thinking === "string" ? part.thinking : undefined, + })) +} + +function extractMessages(value: unknown): unknown[] { + if (Array.isArray(value)) { + return value + } + if (hasData(value) && Array.isArray(value.data)) { + return value.data + } + return [] +} + +function isUnstableTask(task: BackgroundTask): boolean { + if (task.isUnstableAgent === true) return true + const modelId = task.model?.modelID?.toLowerCase() + return modelId ? modelId.includes("gemini") : false +} + +async function resolveMainSessionTarget( + ctx: BabysitterContext, + sessionID: string +): Promise<{ agent?: string; model?: { providerID: string; modelID: string } }> { + let agent = getSessionAgent(sessionID) + let model: { providerID: string; modelID: string } | undefined + + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + }) + const messages = extractMessages(messagesResp) + for (let i = messages.length - 1; i >= 0; i--) { + const info = getMessageInfo(messages[i]) + if (info?.agent || info?.model || (info?.providerID && info?.modelID)) { + agent = agent ?? info?.agent + model = info?.model ?? (info?.providerID && info?.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined) + break + } + } + } catch (error) { + log(`[${HOOK_NAME}] Failed to resolve main session agent`, { sessionID, error: String(error) }) + } + + return { agent, model } +} + +async function getThinkingSummary(ctx: BabysitterContext, sessionID: string): Promise { + try { + const messagesResp = await ctx.client.session.messages({ + path: { id: sessionID }, + }) + const messages = extractMessages(messagesResp) + const chunks: string[] = [] + + for (const message of messages) { + const info = getMessageInfo(message) + if (info?.role !== "assistant") continue + const parts = getMessageParts(message) + for (const part of parts) { + if (part.type === "thinking" && part.thinking) { + chunks.push(part.thinking) + } + if (part.type === "reasoning" && part.text) { + chunks.push(part.text) + } + } + } + + const combined = chunks.join("\n").trim() + if (!combined) return null + if (combined.length <= THINKING_SUMMARY_MAX_CHARS) return combined + return combined.slice(0, THINKING_SUMMARY_MAX_CHARS) + "..." + } catch (error) { + log(`[${HOOK_NAME}] Failed to fetch thinking summary`, { sessionID, error: String(error) }) + return null + } +} + +function buildReminder(task: BackgroundTask, summary: string | null, idleMs: number): string { + const idleSeconds = Math.round(idleMs / 1000) + const summaryText = summary ?? "(No thinking trace available)" + return `Unstable background agent appears idle for ${idleSeconds}s. + +Task ID: ${task.id} +Description: ${task.description} +Agent: ${task.agent} +Status: ${task.status} +Session ID: ${task.sessionID ?? "N/A"} + +Thinking summary (first ${THINKING_SUMMARY_MAX_CHARS} chars): +${summaryText} + +Suggested actions: +- background_output task_id="${task.id}" full_session=true include_thinking=true include_tool_results=true message_limit=50 +- background_cancel taskId="${task.id}" + +This is a reminder only. No automatic action was taken.` +} + +export function createUnstableAgentBabysitterHook(ctx: BabysitterContext, options: BabysitterOptions) { + const reminderCooldowns = new Map() + + const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => { + if (event.type !== "session.idle") return + if (options.config?.enabled !== true) return + + const props = event.properties as Record | undefined + const sessionID = props?.sessionID as string | undefined + if (!sessionID) return + + const mainSessionID = getMainSessionID() + if (!mainSessionID || sessionID !== mainSessionID) return + + const tasks = options.backgroundManager.getTasksByParentSession(mainSessionID) + if (tasks.length === 0) return + + const timeoutMs = options.config?.timeout_ms ?? DEFAULT_TIMEOUT_MS + const now = Date.now() + + for (const task of tasks) { + if (task.status !== "running") continue + if (!isUnstableTask(task)) continue + + const lastMessageAt = task.progress?.lastMessageAt + if (!lastMessageAt) continue + + const idleMs = now - lastMessageAt.getTime() + if (idleMs < timeoutMs) continue + + const lastReminderAt = reminderCooldowns.get(task.id) + if (lastReminderAt && now - lastReminderAt < COOLDOWN_MS) continue + + const summary = task.sessionID ? await getThinkingSummary(ctx, task.sessionID) : null + const reminder = buildReminder(task, summary, idleMs) + const { agent, model } = await resolveMainSessionTarget(ctx, mainSessionID) + + try { + await ctx.client.session.prompt({ + path: { id: mainSessionID }, + body: { + ...(agent ? { agent } : {}), + ...(model ? { model } : {}), + parts: [{ type: "text", text: reminder }], + }, + query: { directory: ctx.directory }, + }) + reminderCooldowns.set(task.id, now) + log(`[${HOOK_NAME}] Reminder injected`, { taskId: task.id, sessionID: mainSessionID }) + } catch (error) { + log(`[${HOOK_NAME}] Reminder injection failed`, { taskId: task.id, error: String(error) }) + } + } + } + + return { + event: eventHandler, + } +} diff --git a/src/index.ts b/src/index.ts index 845b682e9..cc7e2f5d3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -35,6 +35,7 @@ import { createSubagentQuestionBlockerHook, createStopContinuationGuardHook, createCompactionContextInjector, + createUnstableAgentBabysitterHook, } from "./hooks"; import { contextCollector, @@ -290,6 +291,35 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { }) : null; + const unstableAgentBabysitter = + isHookEnabled("unstable-agent-babysitter") && pluginConfig.babysitting?.enabled === true + ? createUnstableAgentBabysitterHook( + { + directory: ctx.directory, + client: { + session: { + messages: async (args) => { + const result = await ctx.client.session.messages(args) + if (Array.isArray(result)) return result + if (typeof result === "object" && result !== null && "data" in result) { + const record = result as Record + return { data: record.data } + } + return [] + }, + prompt: async (args) => { + await ctx.client.session.prompt(args) + }, + }, + }, + }, + { + backgroundManager, + config: pluginConfig.babysitting, + } + ) + : null; + if (sessionRecovery && todoContinuationEnforcer) { sessionRecovery.setOnAbortCallback(todoContinuationEnforcer.markRecovering); sessionRecovery.setOnRecoveryCompleteCallback( @@ -520,6 +550,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => { await backgroundNotificationHook?.event(input); await sessionNotification?.(input); await todoContinuationEnforcer?.handler(input); + await unstableAgentBabysitter?.event(input); await contextWindowMonitor?.event(input); await directoryAgentsInjector?.event(input); await directoryReadmeInjector?.event(input);