feat(hooks): add unstable-agent-babysitter hook for monitoring unstable background agents
This commit is contained in:
@@ -88,6 +88,7 @@ export const HookNameSchema = z.enum([
|
||||
"sisyphus-junior-notepad",
|
||||
"start-work",
|
||||
"atlas",
|
||||
"unstable-agent-babysitter",
|
||||
"stop-continuation-guard",
|
||||
])
|
||||
|
||||
@@ -308,6 +309,11 @@ export const NotificationConfigSchema = z.object({
|
||||
force_enable: z.boolean().optional(),
|
||||
})
|
||||
|
||||
export const BabysittingConfigSchema = z.object({
|
||||
enabled: z.boolean().default(false),
|
||||
timeout_ms: z.number().default(120000),
|
||||
})
|
||||
|
||||
export const GitMasterConfigSchema = z.object({
|
||||
/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */
|
||||
commit_footer: z.boolean().default(true),
|
||||
@@ -383,6 +389,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
|
||||
ralph_loop: RalphLoopConfigSchema.optional(),
|
||||
background_task: BackgroundTaskConfigSchema.optional(),
|
||||
notification: NotificationConfigSchema.optional(),
|
||||
babysitting: BabysittingConfigSchema.optional(),
|
||||
git_master: GitMasterConfigSchema.optional(),
|
||||
browser_automation_engine: BrowserAutomationConfigSchema.optional(),
|
||||
tmux: TmuxConfigSchema.optional(),
|
||||
@@ -405,6 +412,7 @@ export type SkillsConfig = z.infer<typeof SkillsConfigSchema>
|
||||
export type SkillDefinition = z.infer<typeof SkillDefinitionSchema>
|
||||
export type RalphLoopConfig = z.infer<typeof RalphLoopConfigSchema>
|
||||
export type NotificationConfig = z.infer<typeof NotificationConfigSchema>
|
||||
export type BabysittingConfig = z.infer<typeof BabysittingConfigSchema>
|
||||
export type CategoryConfig = z.infer<typeof CategoryConfigSchema>
|
||||
export type CategoriesConfig = z.infer<typeof CategoriesConfigSchema>
|
||||
export type BuiltinCategoryName = z.infer<typeof BuiltinCategoryNameSchema>
|
||||
|
||||
@@ -35,3 +35,4 @@ export { createQuestionLabelTruncatorHook } from "./question-label-truncator";
|
||||
export { createSubagentQuestionBlockerHook } from "./subagent-question-blocker";
|
||||
export { createStopContinuationGuardHook, type StopContinuationGuard } from "./stop-continuation-guard";
|
||||
export { createCompactionContextInjector, type SummarizeContext } from "./compaction-context-injector";
|
||||
export { createUnstableAgentBabysitterHook } from "./unstable-agent-babysitter";
|
||||
|
||||
142
src/hooks/unstable-agent-babysitter/index.test.ts
Normal file
142
src/hooks/unstable-agent-babysitter/index.test.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
import { _resetForTesting, setMainSession } from "../../features/claude-code-session-state"
|
||||
import type { BackgroundTask } from "../../features/background-agent"
|
||||
import { createUnstableAgentBabysitterHook } from "./index"
|
||||
|
||||
const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
|
||||
|
||||
type BabysitterContext = Parameters<typeof createUnstableAgentBabysitterHook>[0]
|
||||
|
||||
function createMockPluginInput(options: {
|
||||
messagesBySession: Record<string, unknown[]>
|
||||
promptCalls: Array<{ input: unknown }>
|
||||
}): BabysitterContext {
|
||||
const { messagesBySession, promptCalls } = options
|
||||
return {
|
||||
directory: projectDir,
|
||||
client: {
|
||||
session: {
|
||||
messages: async ({ path }: { path: { id: string } }) => ({
|
||||
data: messagesBySession[path.id] ?? [],
|
||||
}),
|
||||
prompt: async (input: unknown) => {
|
||||
promptCalls.push({ input })
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function createBackgroundManager(tasks: BackgroundTask[]) {
|
||||
return {
|
||||
getTasksByParentSession: () => tasks,
|
||||
}
|
||||
}
|
||||
|
||||
function createTask(overrides: Partial<BackgroundTask> = {}): BackgroundTask {
|
||||
return {
|
||||
id: "task-1",
|
||||
sessionID: "bg-1",
|
||||
parentSessionID: "main-1",
|
||||
parentMessageID: "msg-1",
|
||||
description: "unstable task",
|
||||
prompt: "run work",
|
||||
agent: "test-agent",
|
||||
status: "running",
|
||||
progress: {
|
||||
toolCalls: 1,
|
||||
lastUpdate: new Date(),
|
||||
lastMessage: "still working",
|
||||
lastMessageAt: new Date(Date.now() - 121000),
|
||||
},
|
||||
model: { providerID: "google", modelID: "gemini-1.5" },
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe("unstable-agent-babysitter hook", () => {
|
||||
afterEach(() => {
|
||||
_resetForTesting()
|
||||
})
|
||||
|
||||
test("fires reminder for hung gemini task", async () => {
|
||||
// #given
|
||||
setMainSession("main-1")
|
||||
const promptCalls: Array<{ input: unknown }> = []
|
||||
const ctx = createMockPluginInput({
|
||||
messagesBySession: {
|
||||
"main-1": [
|
||||
{ info: { agent: "sisyphus", model: { providerID: "openai", modelID: "gpt-4" } } },
|
||||
],
|
||||
"bg-1": [
|
||||
{ info: { role: "assistant" }, parts: [{ type: "thinking", thinking: "deep thought" }] },
|
||||
],
|
||||
},
|
||||
promptCalls,
|
||||
})
|
||||
const backgroundManager = createBackgroundManager([createTask()])
|
||||
const hook = createUnstableAgentBabysitterHook(ctx, {
|
||||
backgroundManager,
|
||||
config: { enabled: true, timeout_ms: 120000 },
|
||||
})
|
||||
|
||||
// #when
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
|
||||
|
||||
// #then
|
||||
expect(promptCalls.length).toBe(1)
|
||||
const payload = promptCalls[0].input as { body?: { parts?: Array<{ text?: string }> } }
|
||||
const text = payload.body?.parts?.[0]?.text ?? ""
|
||||
expect(text).toContain("background_output")
|
||||
expect(text).toContain("background_cancel")
|
||||
expect(text).toContain("deep thought")
|
||||
})
|
||||
|
||||
test("does not remind stable model tasks", async () => {
|
||||
// #given
|
||||
setMainSession("main-1")
|
||||
const promptCalls: Array<{ input: unknown }> = []
|
||||
const ctx = createMockPluginInput({
|
||||
messagesBySession: { "main-1": [] },
|
||||
promptCalls,
|
||||
})
|
||||
const backgroundManager = createBackgroundManager([
|
||||
createTask({ model: { providerID: "openai", modelID: "gpt-4" } }),
|
||||
])
|
||||
const hook = createUnstableAgentBabysitterHook(ctx, {
|
||||
backgroundManager,
|
||||
config: { enabled: true, timeout_ms: 120000 },
|
||||
})
|
||||
|
||||
// #when
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
|
||||
|
||||
// #then
|
||||
expect(promptCalls.length).toBe(0)
|
||||
})
|
||||
|
||||
test("respects per-task cooldown", async () => {
|
||||
// #given
|
||||
setMainSession("main-1")
|
||||
const promptCalls: Array<{ input: unknown }> = []
|
||||
const ctx = createMockPluginInput({
|
||||
messagesBySession: { "main-1": [], "bg-1": [] },
|
||||
promptCalls,
|
||||
})
|
||||
const backgroundManager = createBackgroundManager([createTask()])
|
||||
const hook = createUnstableAgentBabysitterHook(ctx, {
|
||||
backgroundManager,
|
||||
config: { enabled: true, timeout_ms: 120000 },
|
||||
})
|
||||
const now = Date.now()
|
||||
const originalNow = Date.now
|
||||
Date.now = () => now
|
||||
|
||||
// #when
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
|
||||
await hook.event({ event: { type: "session.idle", properties: { sessionID: "main-1" } } })
|
||||
|
||||
// #then
|
||||
expect(promptCalls.length).toBe(1)
|
||||
Date.now = originalNow
|
||||
})
|
||||
})
|
||||
243
src/hooks/unstable-agent-babysitter/index.ts
Normal file
243
src/hooks/unstable-agent-babysitter/index.ts
Normal file
@@ -0,0 +1,243 @@
|
||||
import type { BackgroundManager, BackgroundTask } from "../../features/background-agent"
|
||||
import { getMainSessionID, getSessionAgent } from "../../features/claude-code-session-state"
|
||||
import { log } from "../../shared/logger"
|
||||
|
||||
const HOOK_NAME = "unstable-agent-babysitter"
|
||||
const DEFAULT_TIMEOUT_MS = 120000
|
||||
const COOLDOWN_MS = 5 * 60 * 1000
|
||||
const THINKING_SUMMARY_MAX_CHARS = 500
|
||||
|
||||
type BabysittingConfig = {
|
||||
enabled?: boolean
|
||||
timeout_ms?: number
|
||||
}
|
||||
|
||||
type BabysitterContext = {
|
||||
directory: string
|
||||
client: {
|
||||
session: {
|
||||
messages: (args: { path: { id: string } }) => Promise<{ data?: unknown } | unknown[]>
|
||||
prompt: (args: {
|
||||
path: { id: string }
|
||||
body: {
|
||||
parts: Array<{ type: "text"; text: string }>
|
||||
agent?: string
|
||||
model?: { providerID: string; modelID: string }
|
||||
}
|
||||
query?: { directory?: string }
|
||||
}) => Promise<unknown>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type BabysitterOptions = {
|
||||
backgroundManager: Pick<BackgroundManager, "getTasksByParentSession">
|
||||
config?: BabysittingConfig
|
||||
}
|
||||
|
||||
type MessageInfo = {
|
||||
role?: string
|
||||
agent?: string
|
||||
model?: { providerID: string; modelID: string }
|
||||
providerID?: string
|
||||
modelID?: string
|
||||
}
|
||||
|
||||
type MessagePart = {
|
||||
type?: string
|
||||
text?: string
|
||||
thinking?: string
|
||||
}
|
||||
|
||||
function hasData(value: unknown): value is { data?: unknown } {
|
||||
return typeof value === "object" && value !== null && "data" in value
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
function getMessageInfo(value: unknown): MessageInfo | undefined {
|
||||
if (!isRecord(value)) return undefined
|
||||
if (!isRecord(value.info)) return undefined
|
||||
const info = value.info
|
||||
const modelValue = isRecord(info.model)
|
||||
? info.model
|
||||
: undefined
|
||||
const model = modelValue && typeof modelValue.providerID === "string" && typeof modelValue.modelID === "string"
|
||||
? { providerID: modelValue.providerID, modelID: modelValue.modelID }
|
||||
: undefined
|
||||
return {
|
||||
role: typeof info.role === "string" ? info.role : undefined,
|
||||
agent: typeof info.agent === "string" ? info.agent : undefined,
|
||||
model,
|
||||
providerID: typeof info.providerID === "string" ? info.providerID : undefined,
|
||||
modelID: typeof info.modelID === "string" ? info.modelID : undefined,
|
||||
}
|
||||
}
|
||||
|
||||
function getMessageParts(value: unknown): MessagePart[] {
|
||||
if (!isRecord(value)) return []
|
||||
if (!Array.isArray(value.parts)) return []
|
||||
return value.parts.filter(isRecord).map((part) => ({
|
||||
type: typeof part.type === "string" ? part.type : undefined,
|
||||
text: typeof part.text === "string" ? part.text : undefined,
|
||||
thinking: typeof part.thinking === "string" ? part.thinking : undefined,
|
||||
}))
|
||||
}
|
||||
|
||||
function extractMessages(value: unknown): unknown[] {
|
||||
if (Array.isArray(value)) {
|
||||
return value
|
||||
}
|
||||
if (hasData(value) && Array.isArray(value.data)) {
|
||||
return value.data
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
function isUnstableTask(task: BackgroundTask): boolean {
|
||||
if (task.isUnstableAgent === true) return true
|
||||
const modelId = task.model?.modelID?.toLowerCase()
|
||||
return modelId ? modelId.includes("gemini") : false
|
||||
}
|
||||
|
||||
async function resolveMainSessionTarget(
|
||||
ctx: BabysitterContext,
|
||||
sessionID: string
|
||||
): Promise<{ agent?: string; model?: { providerID: string; modelID: string } }> {
|
||||
let agent = getSessionAgent(sessionID)
|
||||
let model: { providerID: string; modelID: string } | undefined
|
||||
|
||||
try {
|
||||
const messagesResp = await ctx.client.session.messages({
|
||||
path: { id: sessionID },
|
||||
})
|
||||
const messages = extractMessages(messagesResp)
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const info = getMessageInfo(messages[i])
|
||||
if (info?.agent || info?.model || (info?.providerID && info?.modelID)) {
|
||||
agent = agent ?? info?.agent
|
||||
model = info?.model ?? (info?.providerID && info?.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
|
||||
break
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log(`[${HOOK_NAME}] Failed to resolve main session agent`, { sessionID, error: String(error) })
|
||||
}
|
||||
|
||||
return { agent, model }
|
||||
}
|
||||
|
||||
async function getThinkingSummary(ctx: BabysitterContext, sessionID: string): Promise<string | null> {
|
||||
try {
|
||||
const messagesResp = await ctx.client.session.messages({
|
||||
path: { id: sessionID },
|
||||
})
|
||||
const messages = extractMessages(messagesResp)
|
||||
const chunks: string[] = []
|
||||
|
||||
for (const message of messages) {
|
||||
const info = getMessageInfo(message)
|
||||
if (info?.role !== "assistant") continue
|
||||
const parts = getMessageParts(message)
|
||||
for (const part of parts) {
|
||||
if (part.type === "thinking" && part.thinking) {
|
||||
chunks.push(part.thinking)
|
||||
}
|
||||
if (part.type === "reasoning" && part.text) {
|
||||
chunks.push(part.text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const combined = chunks.join("\n").trim()
|
||||
if (!combined) return null
|
||||
if (combined.length <= THINKING_SUMMARY_MAX_CHARS) return combined
|
||||
return combined.slice(0, THINKING_SUMMARY_MAX_CHARS) + "..."
|
||||
} catch (error) {
|
||||
log(`[${HOOK_NAME}] Failed to fetch thinking summary`, { sessionID, error: String(error) })
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function buildReminder(task: BackgroundTask, summary: string | null, idleMs: number): string {
|
||||
const idleSeconds = Math.round(idleMs / 1000)
|
||||
const summaryText = summary ?? "(No thinking trace available)"
|
||||
return `Unstable background agent appears idle for ${idleSeconds}s.
|
||||
|
||||
Task ID: ${task.id}
|
||||
Description: ${task.description}
|
||||
Agent: ${task.agent}
|
||||
Status: ${task.status}
|
||||
Session ID: ${task.sessionID ?? "N/A"}
|
||||
|
||||
Thinking summary (first ${THINKING_SUMMARY_MAX_CHARS} chars):
|
||||
${summaryText}
|
||||
|
||||
Suggested actions:
|
||||
- background_output task_id="${task.id}" full_session=true include_thinking=true include_tool_results=true message_limit=50
|
||||
- background_cancel taskId="${task.id}"
|
||||
|
||||
This is a reminder only. No automatic action was taken.`
|
||||
}
|
||||
|
||||
export function createUnstableAgentBabysitterHook(ctx: BabysitterContext, options: BabysitterOptions) {
|
||||
const reminderCooldowns = new Map<string, number>()
|
||||
|
||||
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
|
||||
if (event.type !== "session.idle") return
|
||||
if (options.config?.enabled !== true) return
|
||||
|
||||
const props = event.properties as Record<string, unknown> | undefined
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
if (!sessionID) return
|
||||
|
||||
const mainSessionID = getMainSessionID()
|
||||
if (!mainSessionID || sessionID !== mainSessionID) return
|
||||
|
||||
const tasks = options.backgroundManager.getTasksByParentSession(mainSessionID)
|
||||
if (tasks.length === 0) return
|
||||
|
||||
const timeoutMs = options.config?.timeout_ms ?? DEFAULT_TIMEOUT_MS
|
||||
const now = Date.now()
|
||||
|
||||
for (const task of tasks) {
|
||||
if (task.status !== "running") continue
|
||||
if (!isUnstableTask(task)) continue
|
||||
|
||||
const lastMessageAt = task.progress?.lastMessageAt
|
||||
if (!lastMessageAt) continue
|
||||
|
||||
const idleMs = now - lastMessageAt.getTime()
|
||||
if (idleMs < timeoutMs) continue
|
||||
|
||||
const lastReminderAt = reminderCooldowns.get(task.id)
|
||||
if (lastReminderAt && now - lastReminderAt < COOLDOWN_MS) continue
|
||||
|
||||
const summary = task.sessionID ? await getThinkingSummary(ctx, task.sessionID) : null
|
||||
const reminder = buildReminder(task, summary, idleMs)
|
||||
const { agent, model } = await resolveMainSessionTarget(ctx, mainSessionID)
|
||||
|
||||
try {
|
||||
await ctx.client.session.prompt({
|
||||
path: { id: mainSessionID },
|
||||
body: {
|
||||
...(agent ? { agent } : {}),
|
||||
...(model ? { model } : {}),
|
||||
parts: [{ type: "text", text: reminder }],
|
||||
},
|
||||
query: { directory: ctx.directory },
|
||||
})
|
||||
reminderCooldowns.set(task.id, now)
|
||||
log(`[${HOOK_NAME}] Reminder injected`, { taskId: task.id, sessionID: mainSessionID })
|
||||
} catch (error) {
|
||||
log(`[${HOOK_NAME}] Reminder injection failed`, { taskId: task.id, error: String(error) })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
event: eventHandler,
|
||||
}
|
||||
}
|
||||
31
src/index.ts
31
src/index.ts
@@ -35,6 +35,7 @@ import {
|
||||
createSubagentQuestionBlockerHook,
|
||||
createStopContinuationGuardHook,
|
||||
createCompactionContextInjector,
|
||||
createUnstableAgentBabysitterHook,
|
||||
} from "./hooks";
|
||||
import {
|
||||
contextCollector,
|
||||
@@ -290,6 +291,35 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
|
||||
})
|
||||
: null;
|
||||
|
||||
const unstableAgentBabysitter =
|
||||
isHookEnabled("unstable-agent-babysitter") && pluginConfig.babysitting?.enabled === true
|
||||
? createUnstableAgentBabysitterHook(
|
||||
{
|
||||
directory: ctx.directory,
|
||||
client: {
|
||||
session: {
|
||||
messages: async (args) => {
|
||||
const result = await ctx.client.session.messages(args)
|
||||
if (Array.isArray(result)) return result
|
||||
if (typeof result === "object" && result !== null && "data" in result) {
|
||||
const record = result as Record<string, unknown>
|
||||
return { data: record.data }
|
||||
}
|
||||
return []
|
||||
},
|
||||
prompt: async (args) => {
|
||||
await ctx.client.session.prompt(args)
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
backgroundManager,
|
||||
config: pluginConfig.babysitting,
|
||||
}
|
||||
)
|
||||
: null;
|
||||
|
||||
if (sessionRecovery && todoContinuationEnforcer) {
|
||||
sessionRecovery.setOnAbortCallback(todoContinuationEnforcer.markRecovering);
|
||||
sessionRecovery.setOnRecoveryCompleteCallback(
|
||||
@@ -520,6 +550,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
|
||||
await backgroundNotificationHook?.event(input);
|
||||
await sessionNotification?.(input);
|
||||
await todoContinuationEnforcer?.handler(input);
|
||||
await unstableAgentBabysitter?.event(input);
|
||||
await contextWindowMonitor?.event(input);
|
||||
await directoryAgentsInjector?.event(input);
|
||||
await directoryReadmeInjector?.event(input);
|
||||
|
||||
Reference in New Issue
Block a user