fix(#2825): secondary agents no longer pruned after 30 min of total runtime

TTL (pruneStaleTasksAndNotifications) now resets on last activity:
- Uses task.progress.lastUpdate as TTL anchor for running tasks
  (was always using startedAt, causing 30-min hard deadline)
- Added taskTtlMs config option for user-adjustable TTL
- Error message shows actual TTL duration, not hardcoded '30 minutes'
- 3 new tests for the new behavior
This commit is contained in:
YeonGyu-Kim
2026-03-27 16:06:38 +09:00
parent 3b4420bc23
commit c41e59e9ab
5 changed files with 144 additions and 5 deletions

View File

@@ -4661,6 +4661,10 @@
"type": "number",
"minimum": 60000
},
"taskTtlMs": {
"type": "number",
"minimum": 300000
},
"sessionGoneTimeoutMs": {
"type": "number",
"minimum": 10000

View File

@@ -16,6 +16,8 @@ export const BackgroundTaskConfigSchema = z.object({
staleTimeoutMs: z.number().min(60000).optional(),
/** Timeout for tasks that never received any progress update, falling back to startedAt (default: 1800000 = 30 minutes, minimum: 60000 = 1 minute) */
messageStalenessTimeoutMs: z.number().min(60000).optional(),
/** Absolute TTL for non-terminal tasks in milliseconds (default: 1800000 = 30 minutes, minimum: 300000 = 5 minutes). Tasks exceeding this age from their last activity (or startedAt if no progress) are pruned. */
taskTtlMs: z.number().min(300000).optional(),
/** Timeout for tasks whose session has completely disappeared from the status registry (default: 60000 = 1 minute, minimum: 10000 = 10 seconds). When a session is gone (likely crashed), this shorter timeout is used instead of the normal stale timeout. */
sessionGoneTimeoutMs: z.number().min(10000).optional(),
syncPollTimeoutMs: z.number().min(60000).optional(),

View File

@@ -1751,6 +1751,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
pruneStaleTasksAndNotifications({
tasks: this.tasks,
notifications: this.notifications,
taskTtlMs: this.config?.taskTtlMs,
onTaskPruned: (taskId, task, errorMessage) => {
const wasPending = task.status === "pending"
log("[background-agent] Pruning stale task:", { taskId, status: task.status, age: Math.round(((wasPending ? task.queuedAt?.getTime() : task.startedAt?.getTime()) ? (Date.now() - (wasPending ? task.queuedAt!.getTime() : task.startedAt!.getTime())) : 0) / 1000) + "s" })

View File

@@ -597,6 +597,132 @@ describe("pruneStaleTasksAndNotifications", () => {
expect(pruned).toContain("old-task")
})
it("#given running task with recent progress #when startedAt exceeds TTL #then should NOT prune", () => {
//#given
const tasks = new Map<string, BackgroundTask>()
const activeTask: BackgroundTask = {
id: "active-task",
parentSessionID: "parent",
parentMessageID: "msg",
description: "active",
prompt: "active",
agent: "oracle",
status: "running",
startedAt: new Date(Date.now() - 45 * 60 * 1000),
progress: {
toolCalls: 10,
lastUpdate: new Date(Date.now() - 5 * 60 * 1000),
},
}
tasks.set("active-task", activeTask)
const pruned: string[] = []
const notifications = new Map<string, BackgroundTask[]>()
//#when
pruneStaleTasksAndNotifications({
tasks,
notifications,
onTaskPruned: (taskId) => pruned.push(taskId),
})
//#then
expect(pruned).toEqual([])
})
it("#given running task with stale progress #when lastUpdate exceeds TTL #then should prune", () => {
//#given
const tasks = new Map<string, BackgroundTask>()
const staleTask: BackgroundTask = {
id: "stale-task",
parentSessionID: "parent",
parentMessageID: "msg",
description: "stale",
prompt: "stale",
agent: "oracle",
status: "running",
startedAt: new Date(Date.now() - 60 * 60 * 1000),
progress: {
toolCalls: 10,
lastUpdate: new Date(Date.now() - 35 * 60 * 1000),
},
}
tasks.set("stale-task", staleTask)
const pruned: string[] = []
const notifications = new Map<string, BackgroundTask[]>()
//#when
pruneStaleTasksAndNotifications({
tasks,
notifications,
onTaskPruned: (taskId) => pruned.push(taskId),
})
//#then
expect(pruned).toContain("stale-task")
})
it("#given custom taskTtlMs #when task exceeds custom TTL #then should prune", () => {
//#given
const tasks = new Map<string, BackgroundTask>()
const task: BackgroundTask = {
id: "custom-ttl-task",
parentSessionID: "parent",
parentMessageID: "msg",
description: "custom",
prompt: "custom",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - 61 * 60 * 1000),
}
tasks.set("custom-ttl-task", task)
const pruned: string[] = []
const notifications = new Map<string, BackgroundTask[]>()
//#when
pruneStaleTasksAndNotifications({
tasks,
notifications,
taskTtlMs: 60 * 60 * 1000,
onTaskPruned: (taskId) => pruned.push(taskId),
})
//#then
expect(pruned).toContain("custom-ttl-task")
})
it("#given custom taskTtlMs #when task within custom TTL #then should NOT prune", () => {
//#given
const tasks = new Map<string, BackgroundTask>()
const task: BackgroundTask = {
id: "within-ttl-task",
parentSessionID: "parent",
parentMessageID: "msg",
description: "within",
prompt: "within",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - 45 * 60 * 1000),
}
tasks.set("within-ttl-task", task)
const pruned: string[] = []
const notifications = new Map<string, BackgroundTask[]>()
//#when
pruneStaleTasksAndNotifications({
tasks,
notifications,
taskTtlMs: 60 * 60 * 1000,
onTaskPruned: (taskId) => pruned.push(taskId),
})
//#then
expect(pruned).toEqual([])
})
it("should prune terminal tasks when completion time exceeds terminal TTL", () => {
//#given
const tasks = new Map<string, BackgroundTask>()

View File

@@ -27,8 +27,10 @@ export function pruneStaleTasksAndNotifications(args: {
tasks: Map<string, BackgroundTask>
notifications: Map<string, BackgroundTask[]>
onTaskPruned: (taskId: string, task: BackgroundTask, errorMessage: string) => void
taskTtlMs?: number
}): void {
const { tasks, notifications, onTaskPruned } = args
const effectiveTtl = args.taskTtlMs ?? TASK_TTL_MS
const now = Date.now()
const tasksWithPendingNotifications = new Set<string>()
@@ -53,18 +55,22 @@ export function pruneStaleTasksAndNotifications(args: {
continue
}
const lastActivity = task.status === "running" && task.progress?.lastUpdate
? task.progress.lastUpdate.getTime()
: undefined
const timestamp = task.status === "pending"
? task.queuedAt?.getTime()
: task.startedAt?.getTime()
: (lastActivity ?? task.startedAt?.getTime())
if (!timestamp) continue
const age = now - timestamp
if (age <= TASK_TTL_MS) continue
if (age <= effectiveTtl) continue
const ttlMinutes = Math.round(effectiveTtl / 60000)
const errorMessage = task.status === "pending"
? "Task timed out while queued (30 minutes)"
: "Task timed out after 30 minutes"
? `Task timed out while queued (${ttlMinutes} minutes)`
: `Task timed out after ${ttlMinutes} minutes of inactivity`
onTaskPruned(taskId, task, errorMessage)
}
@@ -78,7 +84,7 @@ export function pruneStaleTasksAndNotifications(args: {
const validNotifications = queued.filter((task) => {
if (!task.startedAt) return false
const age = now - task.startedAt.getTime()
return age <= TASK_TTL_MS
return age <= effectiveTtl
})
if (validNotifications.length === 0) {