refactor(background-agent): extract inline logic from manager.ts into focused modules
Extract 5 concerns from BackgroundManager into dedicated modules: - error-classifier.ts: enhance with extractErrorName, extractErrorMessage, getSessionErrorMessage, isRecord - fallback-retry-handler.ts: standalone tryFallbackRetry with full retry logic - process-cleanup.ts: registerManagerForCleanup/unregisterManagerForCleanup - compaction-aware-message-resolver.ts: isCompactionAgent/findNearestMessageExcludingCompaction - Delete notification-builder.ts (duplicate of background-task-notification-template.ts) Manager.ts method bodies now delegate to extracted modules. Wire duration-formatter.ts and task-poller.ts (existing but unused). manager.ts: 2036 -> 1647 LOC (19% reduction). All 161 existing tests pass unchanged.
This commit is contained in:
@@ -0,0 +1,57 @@
|
||||
import { readdirSync, readFileSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
import type { StoredMessage } from "../hook-message-injector"
|
||||
|
||||
export function isCompactionAgent(agent: string | undefined): boolean {
|
||||
return agent?.trim().toLowerCase() === "compaction"
|
||||
}
|
||||
|
||||
function hasFullAgentAndModel(message: StoredMessage): boolean {
|
||||
return !!message.agent &&
|
||||
!isCompactionAgent(message.agent) &&
|
||||
!!message.model?.providerID &&
|
||||
!!message.model?.modelID
|
||||
}
|
||||
|
||||
function hasPartialAgentOrModel(message: StoredMessage): boolean {
|
||||
const hasAgent = !!message.agent && !isCompactionAgent(message.agent)
|
||||
const hasModel = !!message.model?.providerID && !!message.model?.modelID
|
||||
return hasAgent || hasModel
|
||||
}
|
||||
|
||||
export function findNearestMessageExcludingCompaction(messageDir: string): StoredMessage | null {
|
||||
try {
|
||||
const files = readdirSync(messageDir)
|
||||
.filter((name) => name.endsWith(".json"))
|
||||
.sort()
|
||||
.reverse()
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = readFileSync(join(messageDir, file), "utf-8")
|
||||
const parsed = JSON.parse(content) as StoredMessage
|
||||
if (hasFullAgentAndModel(parsed)) {
|
||||
return parsed
|
||||
}
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = readFileSync(join(messageDir, file), "utf-8")
|
||||
const parsed = JSON.parse(content) as StoredMessage
|
||||
if (hasPartialAgentOrModel(parsed)) {
|
||||
return parsed
|
||||
}
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
@@ -1,3 +1,7 @@
|
||||
export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
export function isAbortedSessionError(error: unknown): boolean {
|
||||
const message = getErrorText(error)
|
||||
return message.toLowerCase().includes("aborted")
|
||||
@@ -19,3 +23,61 @@ export function getErrorText(error: unknown): string {
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
export function extractErrorName(error: unknown): string | undefined {
|
||||
if (isRecord(error) && typeof error["name"] === "string") return error["name"]
|
||||
if (error instanceof Error) return error.name
|
||||
return undefined
|
||||
}
|
||||
|
||||
export function extractErrorMessage(error: unknown): string | undefined {
|
||||
if (!error) return undefined
|
||||
if (typeof error === "string") return error
|
||||
if (error instanceof Error) return error.message
|
||||
|
||||
if (isRecord(error)) {
|
||||
const dataRaw = error["data"]
|
||||
const candidates: unknown[] = [
|
||||
error,
|
||||
dataRaw,
|
||||
error["error"],
|
||||
isRecord(dataRaw) ? (dataRaw as Record<string, unknown>)["error"] : undefined,
|
||||
error["cause"],
|
||||
]
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate === "string" && candidate.length > 0) return candidate
|
||||
if (
|
||||
isRecord(candidate) &&
|
||||
typeof candidate["message"] === "string" &&
|
||||
candidate["message"].length > 0
|
||||
) {
|
||||
return candidate["message"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.stringify(error)
|
||||
} catch {
|
||||
return String(error)
|
||||
}
|
||||
}
|
||||
|
||||
interface EventPropertiesLike {
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export function getSessionErrorMessage(properties: EventPropertiesLike): string | undefined {
|
||||
const errorRaw = properties["error"]
|
||||
if (!isRecord(errorRaw)) return undefined
|
||||
|
||||
const dataRaw = errorRaw["data"]
|
||||
if (isRecord(dataRaw)) {
|
||||
const message = dataRaw["message"]
|
||||
if (typeof message === "string") return message
|
||||
}
|
||||
|
||||
const message = errorRaw["message"]
|
||||
return typeof message === "string" ? message : undefined
|
||||
}
|
||||
|
||||
125
src/features/background-agent/fallback-retry-handler.ts
Normal file
125
src/features/background-agent/fallback-retry-handler.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import type { BackgroundTask, LaunchInput } from "./types"
|
||||
import type { FallbackEntry } from "../../shared/model-requirements"
|
||||
import type { ConcurrencyManager } from "./concurrency"
|
||||
import type { OpencodeClient, QueueItem } from "./constants"
|
||||
import { log, readConnectedProvidersCache, readProviderModelsCache } from "../../shared"
|
||||
import {
|
||||
shouldRetryError,
|
||||
getNextFallback,
|
||||
hasMoreFallbacks,
|
||||
selectFallbackProvider,
|
||||
} from "../../shared/model-error-classifier"
|
||||
import { transformModelForProvider } from "../../shared/provider-model-id-transform"
|
||||
|
||||
export function tryFallbackRetry(args: {
|
||||
task: BackgroundTask
|
||||
errorInfo: { name?: string; message?: string }
|
||||
source: string
|
||||
concurrencyManager: ConcurrencyManager
|
||||
client: OpencodeClient
|
||||
idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>>
|
||||
queuesByKey: Map<string, QueueItem[]>
|
||||
processKey: (key: string) => void
|
||||
}): boolean {
|
||||
const { task, errorInfo, source, concurrencyManager, client, idleDeferralTimers, queuesByKey, processKey } = args
|
||||
const fallbackChain = task.fallbackChain
|
||||
const canRetry =
|
||||
shouldRetryError(errorInfo) &&
|
||||
fallbackChain &&
|
||||
fallbackChain.length > 0 &&
|
||||
hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0)
|
||||
|
||||
if (!canRetry) return false
|
||||
|
||||
const attemptCount = task.attemptCount ?? 0
|
||||
const providerModelsCache = readProviderModelsCache()
|
||||
const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
|
||||
const connectedSet = connectedProviders ? new Set(connectedProviders.map(p => p.toLowerCase())) : null
|
||||
|
||||
const isReachable = (entry: FallbackEntry): boolean => {
|
||||
if (!connectedSet) return true
|
||||
return entry.providers.some((p) => connectedSet.has(p.toLowerCase()))
|
||||
}
|
||||
|
||||
let selectedAttemptCount = attemptCount
|
||||
let nextFallback: FallbackEntry | undefined
|
||||
while (fallbackChain && selectedAttemptCount < fallbackChain.length) {
|
||||
const candidate = getNextFallback(fallbackChain, selectedAttemptCount)
|
||||
if (!candidate) break
|
||||
selectedAttemptCount++
|
||||
if (!isReachable(candidate)) {
|
||||
log("[background-agent] Skipping unreachable fallback:", {
|
||||
taskId: task.id,
|
||||
source,
|
||||
model: candidate.model,
|
||||
providers: candidate.providers,
|
||||
})
|
||||
continue
|
||||
}
|
||||
nextFallback = candidate
|
||||
break
|
||||
}
|
||||
if (!nextFallback) return false
|
||||
|
||||
const providerID = selectFallbackProvider(
|
||||
nextFallback.providers,
|
||||
task.model?.providerID,
|
||||
)
|
||||
|
||||
log("[background-agent] Retryable error, attempting fallback:", {
|
||||
taskId: task.id,
|
||||
source,
|
||||
errorName: errorInfo.name,
|
||||
errorMessage: errorInfo.message?.slice(0, 100),
|
||||
attemptCount: selectedAttemptCount,
|
||||
nextModel: `${providerID}/${nextFallback.model}`,
|
||||
})
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
concurrencyManager.release(task.concurrencyKey)
|
||||
task.concurrencyKey = undefined
|
||||
}
|
||||
|
||||
if (task.sessionID) {
|
||||
client.session.abort({ path: { id: task.sessionID } }).catch(() => {})
|
||||
}
|
||||
|
||||
const idleTimer = idleDeferralTimers.get(task.id)
|
||||
if (idleTimer) {
|
||||
clearTimeout(idleTimer)
|
||||
idleDeferralTimers.delete(task.id)
|
||||
}
|
||||
|
||||
task.attemptCount = selectedAttemptCount
|
||||
const transformedModelId = transformModelForProvider(providerID, nextFallback.model)
|
||||
task.model = {
|
||||
providerID,
|
||||
modelID: transformedModelId,
|
||||
variant: nextFallback.variant,
|
||||
}
|
||||
task.status = "pending"
|
||||
task.sessionID = undefined
|
||||
task.startedAt = undefined
|
||||
task.queuedAt = new Date()
|
||||
task.error = undefined
|
||||
|
||||
const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent
|
||||
const queue = queuesByKey.get(key) ?? []
|
||||
const retryInput: LaunchInput = {
|
||||
description: task.description,
|
||||
prompt: task.prompt,
|
||||
agent: task.agent,
|
||||
parentSessionID: task.parentSessionID,
|
||||
parentMessageID: task.parentMessageID,
|
||||
parentModel: task.parentModel,
|
||||
parentAgent: task.parentAgent,
|
||||
parentTools: task.parentTools,
|
||||
model: task.model,
|
||||
fallbackChain: task.fallbackChain,
|
||||
category: task.category,
|
||||
}
|
||||
queue.push({ task, input: retryInput })
|
||||
queuesByKey.set(key, queue)
|
||||
processKey(key)
|
||||
return true
|
||||
}
|
||||
@@ -5,16 +5,14 @@ import type {
|
||||
LaunchInput,
|
||||
ResumeInput,
|
||||
} from "./types"
|
||||
import type { FallbackEntry } from "../../shared/model-requirements"
|
||||
import { TaskHistory } from "./task-history"
|
||||
import {
|
||||
log,
|
||||
getAgentToolRestrictions,
|
||||
getMessageDir,
|
||||
normalizePromptTools,
|
||||
normalizeSDKResponse,
|
||||
promptWithModelSuggestionRetry,
|
||||
readConnectedProvidersCache,
|
||||
readProviderModelsCache,
|
||||
resolveInheritedPromptTools,
|
||||
createInternalAgentTextPart,
|
||||
} from "../../shared"
|
||||
@@ -25,28 +23,29 @@ import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
|
||||
import { isInsideTmux } from "../../shared/tmux"
|
||||
import {
|
||||
shouldRetryError,
|
||||
getNextFallback,
|
||||
hasMoreFallbacks,
|
||||
selectFallbackProvider,
|
||||
} from "../../shared/model-error-classifier"
|
||||
import { transformModelForProvider } from "../../shared/provider-model-id-transform"
|
||||
import {
|
||||
DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS,
|
||||
DEFAULT_STALE_TIMEOUT_MS,
|
||||
MIN_IDLE_TIME_MS,
|
||||
MIN_RUNTIME_BEFORE_STALE_MS,
|
||||
POLLING_INTERVAL_MS,
|
||||
TASK_CLEANUP_DELAY_MS,
|
||||
TASK_TTL_MS,
|
||||
} from "./constants"
|
||||
|
||||
import { subagentSessions } from "../claude-code-session-state"
|
||||
import { getTaskToastManager } from "../task-toast-manager"
|
||||
import { MESSAGE_STORAGE, type StoredMessage } from "../hook-message-injector"
|
||||
import { existsSync, readFileSync, readdirSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
|
||||
type ProcessCleanupEvent = NodeJS.Signals | "beforeExit" | "exit"
|
||||
import { formatDuration } from "./duration-formatter"
|
||||
import {
|
||||
isAbortedSessionError,
|
||||
extractErrorName,
|
||||
extractErrorMessage,
|
||||
getSessionErrorMessage,
|
||||
isRecord,
|
||||
} from "./error-classifier"
|
||||
import { tryFallbackRetry } from "./fallback-retry-handler"
|
||||
import { registerManagerForCleanup, unregisterManagerForCleanup } from "./process-cleanup"
|
||||
import { isCompactionAgent, findNearestMessageExcludingCompaction } from "./compaction-aware-message-resolver"
|
||||
import { pruneStaleTasksAndNotifications } from "./task-poller"
|
||||
import { checkAndInterruptStaleTasks } from "./task-poller"
|
||||
|
||||
type OpencodeClient = PluginInput["client"]
|
||||
|
||||
@@ -89,9 +88,7 @@ export interface SubagentSessionCreatedEvent {
|
||||
export type OnSubagentSessionCreated = (event: SubagentSessionCreatedEvent) => Promise<void>
|
||||
|
||||
export class BackgroundManager {
|
||||
private static cleanupManagers = new Set<BackgroundManager>()
|
||||
private static cleanupRegistered = false
|
||||
private static cleanupHandlers = new Map<ProcessCleanupEvent, () => void>()
|
||||
|
||||
|
||||
private tasks: Map<string, BackgroundTask>
|
||||
private notifications: Map<string, BackgroundTask[]>
|
||||
@@ -705,8 +702,8 @@ export class BackgroundManager {
|
||||
if (!assistantError) return
|
||||
|
||||
const errorInfo = {
|
||||
name: this.extractErrorName(assistantError),
|
||||
message: this.extractErrorMessage(assistantError),
|
||||
name: extractErrorName(assistantError),
|
||||
message: extractErrorMessage(assistantError),
|
||||
}
|
||||
this.tryFallbackRetry(task, errorInfo, "message.updated")
|
||||
}
|
||||
@@ -809,7 +806,7 @@ export class BackgroundManager {
|
||||
|
||||
const errorObj = props?.error as { name?: string; message?: string } | undefined
|
||||
const errorName = errorObj?.name
|
||||
const errorMessage = props ? this.getSessionErrorMessage(props) : undefined
|
||||
const errorMessage = props ? getSessionErrorMessage(props) : undefined
|
||||
|
||||
const errorInfo = { name: errorName, message: errorMessage }
|
||||
if (this.tryFallbackRetry(task, errorInfo, "session.error")) return
|
||||
@@ -934,110 +931,20 @@ export class BackgroundManager {
|
||||
errorInfo: { name?: string; message?: string },
|
||||
source: string,
|
||||
): boolean {
|
||||
const fallbackChain = task.fallbackChain
|
||||
const canRetry =
|
||||
shouldRetryError(errorInfo) &&
|
||||
fallbackChain &&
|
||||
fallbackChain.length > 0 &&
|
||||
hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0)
|
||||
|
||||
if (!canRetry) return false
|
||||
|
||||
const attemptCount = task.attemptCount ?? 0
|
||||
const providerModelsCache = readProviderModelsCache()
|
||||
const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
|
||||
const connectedSet = connectedProviders ? new Set(connectedProviders.map(p => p.toLowerCase())) : null
|
||||
|
||||
const isReachable = (entry: FallbackEntry): boolean => {
|
||||
if (!connectedSet) return true
|
||||
|
||||
// Gate only on provider connectivity. Provider model lists can be stale/incomplete,
|
||||
// especially after users manually add models to opencode.json.
|
||||
return entry.providers.some((p) => connectedSet.has(p.toLowerCase()))
|
||||
}
|
||||
|
||||
let selectedAttemptCount = attemptCount
|
||||
let nextFallback: FallbackEntry | undefined
|
||||
while (fallbackChain && selectedAttemptCount < fallbackChain.length) {
|
||||
const candidate = getNextFallback(fallbackChain, selectedAttemptCount)
|
||||
if (!candidate) break
|
||||
selectedAttemptCount++
|
||||
if (!isReachable(candidate)) {
|
||||
log("[background-agent] Skipping unreachable fallback:", {
|
||||
taskId: task.id,
|
||||
const result = tryFallbackRetry({
|
||||
task,
|
||||
errorInfo,
|
||||
source,
|
||||
model: candidate.model,
|
||||
providers: candidate.providers,
|
||||
concurrencyManager: this.concurrencyManager,
|
||||
client: this.client,
|
||||
idleDeferralTimers: this.idleDeferralTimers,
|
||||
queuesByKey: this.queuesByKey,
|
||||
processKey: (key: string) => this.processKey(key),
|
||||
})
|
||||
continue
|
||||
}
|
||||
nextFallback = candidate
|
||||
break
|
||||
}
|
||||
if (!nextFallback) return false
|
||||
|
||||
const providerID = selectFallbackProvider(
|
||||
nextFallback.providers,
|
||||
task.model?.providerID,
|
||||
)
|
||||
|
||||
log("[background-agent] Retryable error, attempting fallback:", {
|
||||
taskId: task.id,
|
||||
source,
|
||||
errorName: errorInfo.name,
|
||||
errorMessage: errorInfo.message?.slice(0, 100),
|
||||
attemptCount: selectedAttemptCount,
|
||||
nextModel: `${providerID}/${nextFallback.model}`,
|
||||
})
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
this.concurrencyManager.release(task.concurrencyKey)
|
||||
task.concurrencyKey = undefined
|
||||
}
|
||||
|
||||
if (task.sessionID) {
|
||||
this.client.session.abort({ path: { id: task.sessionID } }).catch(() => {})
|
||||
if (result && task.sessionID) {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
}
|
||||
|
||||
const idleTimer = this.idleDeferralTimers.get(task.id)
|
||||
if (idleTimer) {
|
||||
clearTimeout(idleTimer)
|
||||
this.idleDeferralTimers.delete(task.id)
|
||||
}
|
||||
|
||||
task.attemptCount = selectedAttemptCount
|
||||
const transformedModelId = transformModelForProvider(providerID, nextFallback.model)
|
||||
task.model = {
|
||||
providerID,
|
||||
modelID: transformedModelId,
|
||||
variant: nextFallback.variant,
|
||||
}
|
||||
task.status = "pending"
|
||||
task.sessionID = undefined
|
||||
task.startedAt = undefined
|
||||
task.queuedAt = new Date()
|
||||
task.error = undefined
|
||||
|
||||
const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent
|
||||
const queue = this.queuesByKey.get(key) ?? []
|
||||
const retryInput: LaunchInput = {
|
||||
description: task.description,
|
||||
prompt: task.prompt,
|
||||
agent: task.agent,
|
||||
parentSessionID: task.parentSessionID,
|
||||
parentMessageID: task.parentMessageID,
|
||||
parentModel: task.parentModel,
|
||||
parentAgent: task.parentAgent,
|
||||
parentTools: task.parentTools,
|
||||
model: task.model,
|
||||
fallbackChain: task.fallbackChain,
|
||||
category: task.category,
|
||||
}
|
||||
queue.push({ task, input: retryInput })
|
||||
this.queuesByKey.set(key, queue)
|
||||
this.processKey(key)
|
||||
return true
|
||||
return result
|
||||
}
|
||||
|
||||
markForNotification(task: BackgroundTask): void {
|
||||
@@ -1256,45 +1163,11 @@ export class BackgroundManager {
|
||||
}
|
||||
|
||||
private registerProcessCleanup(): void {
|
||||
BackgroundManager.cleanupManagers.add(this)
|
||||
|
||||
if (BackgroundManager.cleanupRegistered) return
|
||||
BackgroundManager.cleanupRegistered = true
|
||||
|
||||
const cleanupAll = () => {
|
||||
for (const manager of BackgroundManager.cleanupManagers) {
|
||||
try {
|
||||
manager.shutdown()
|
||||
} catch (error) {
|
||||
log("[background-agent] Error during shutdown cleanup:", error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const registerSignal = (signal: ProcessCleanupEvent, exitAfter: boolean): void => {
|
||||
const listener = registerProcessSignal(signal, cleanupAll, exitAfter)
|
||||
BackgroundManager.cleanupHandlers.set(signal, listener)
|
||||
}
|
||||
|
||||
registerSignal("SIGINT", true)
|
||||
registerSignal("SIGTERM", true)
|
||||
if (process.platform === "win32") {
|
||||
registerSignal("SIGBREAK", true)
|
||||
}
|
||||
registerSignal("beforeExit", false)
|
||||
registerSignal("exit", false)
|
||||
registerManagerForCleanup(this)
|
||||
}
|
||||
|
||||
private unregisterProcessCleanup(): void {
|
||||
BackgroundManager.cleanupManagers.delete(this)
|
||||
|
||||
if (BackgroundManager.cleanupManagers.size > 0) return
|
||||
|
||||
for (const [signal, listener] of BackgroundManager.cleanupHandlers.entries()) {
|
||||
process.off(signal, listener)
|
||||
}
|
||||
BackgroundManager.cleanupHandlers.clear()
|
||||
BackgroundManager.cleanupRegistered = false
|
||||
unregisterManagerForCleanup(this)
|
||||
}
|
||||
|
||||
|
||||
@@ -1368,7 +1241,7 @@ export class BackgroundManager {
|
||||
// Note: Callers must release concurrency before calling this method
|
||||
// to ensure slots are freed even if notification fails
|
||||
|
||||
const duration = this.formatDuration(task.startedAt ?? new Date(), task.completedAt)
|
||||
const duration = formatDuration(task.startedAt ?? new Date(), task.completedAt)
|
||||
|
||||
log("[background-agent] notifyParentSession called for task:", task.id)
|
||||
|
||||
@@ -1455,7 +1328,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
if (isCompactionAgent(info?.agent)) {
|
||||
continue
|
||||
}
|
||||
const normalizedTools = this.isRecord(info?.tools)
|
||||
const normalizedTools = isRecord(info?.tools)
|
||||
? normalizePromptTools(info.tools as Record<string, boolean | "allow" | "deny" | "ask">)
|
||||
: undefined
|
||||
if (info?.agent || info?.model || (info?.modelID && info?.providerID) || normalizedTools) {
|
||||
@@ -1466,7 +1339,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (this.isAbortedSessionError(error)) {
|
||||
if (isAbortedSessionError(error)) {
|
||||
log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
|
||||
taskId: task.id,
|
||||
parentSessionID: task.parentSessionID,
|
||||
@@ -1506,7 +1379,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
noReply: !allComplete,
|
||||
})
|
||||
} catch (error) {
|
||||
if (this.isAbortedSessionError(error)) {
|
||||
if (isAbortedSessionError(error)) {
|
||||
log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
|
||||
taskId: task.id,
|
||||
parentSessionID: task.parentSessionID,
|
||||
@@ -1544,97 +1417,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
}
|
||||
|
||||
private formatDuration(start: Date, end?: Date): string {
|
||||
const duration = (end ?? new Date()).getTime() - start.getTime()
|
||||
const seconds = Math.floor(duration / 1000)
|
||||
const minutes = Math.floor(seconds / 60)
|
||||
const hours = Math.floor(minutes / 60)
|
||||
|
||||
if (hours > 0) {
|
||||
return `${hours}h ${minutes % 60}m ${seconds % 60}s`
|
||||
} else if (minutes > 0) {
|
||||
return `${minutes}m ${seconds % 60}s`
|
||||
}
|
||||
return `${seconds}s`
|
||||
return formatDuration(start, end)
|
||||
}
|
||||
|
||||
private isAbortedSessionError(error: unknown): boolean {
|
||||
const message = this.getErrorText(error)
|
||||
return message.toLowerCase().includes("aborted")
|
||||
}
|
||||
|
||||
private getErrorText(error: unknown): string {
|
||||
if (!error) return ""
|
||||
if (typeof error === "string") return error
|
||||
if (error instanceof Error) {
|
||||
return `${error.name}: ${error.message}`
|
||||
}
|
||||
if (typeof error === "object" && error !== null) {
|
||||
if ("message" in error && typeof error.message === "string") {
|
||||
return error.message
|
||||
}
|
||||
if ("name" in error && typeof error.name === "string") {
|
||||
return error.name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
private extractErrorName(error: unknown): string | undefined {
|
||||
if (this.isRecord(error) && typeof error["name"] === "string") return error["name"]
|
||||
if (error instanceof Error) return error.name
|
||||
return undefined
|
||||
}
|
||||
|
||||
private extractErrorMessage(error: unknown): string | undefined {
|
||||
if (!error) return undefined
|
||||
if (typeof error === "string") return error
|
||||
if (error instanceof Error) return error.message
|
||||
|
||||
if (this.isRecord(error)) {
|
||||
const dataRaw = error["data"]
|
||||
const candidates: unknown[] = [
|
||||
error,
|
||||
dataRaw,
|
||||
error["error"],
|
||||
this.isRecord(dataRaw) ? (dataRaw as Record<string, unknown>)["error"] : undefined,
|
||||
error["cause"],
|
||||
]
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate === "string" && candidate.length > 0) return candidate
|
||||
if (
|
||||
this.isRecord(candidate) &&
|
||||
typeof candidate["message"] === "string" &&
|
||||
candidate["message"].length > 0
|
||||
) {
|
||||
return candidate["message"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.stringify(error)
|
||||
} catch {
|
||||
return String(error)
|
||||
}
|
||||
}
|
||||
|
||||
private isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
private getSessionErrorMessage(properties: EventProperties): string | undefined {
|
||||
const errorRaw = properties["error"]
|
||||
if (!this.isRecord(errorRaw)) return undefined
|
||||
|
||||
const dataRaw = errorRaw["data"]
|
||||
if (this.isRecord(dataRaw)) {
|
||||
const message = dataRaw["message"]
|
||||
if (typeof message === "string") return message
|
||||
}
|
||||
|
||||
const message = errorRaw["message"]
|
||||
return typeof message === "string" ? message : undefined
|
||||
return isAbortedSessionError(error)
|
||||
}
|
||||
|
||||
private hasRunningTasks(): boolean {
|
||||
@@ -1645,25 +1432,12 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
}
|
||||
|
||||
private pruneStaleTasksAndNotifications(): void {
|
||||
const now = Date.now()
|
||||
|
||||
for (const [taskId, task] of this.tasks.entries()) {
|
||||
pruneStaleTasksAndNotifications({
|
||||
tasks: this.tasks,
|
||||
notifications: this.notifications,
|
||||
onTaskPruned: (taskId, task, errorMessage) => {
|
||||
const wasPending = task.status === "pending"
|
||||
const timestamp = task.status === "pending"
|
||||
? task.queuedAt?.getTime()
|
||||
: task.startedAt?.getTime()
|
||||
|
||||
if (!timestamp) {
|
||||
continue
|
||||
}
|
||||
|
||||
const age = now - timestamp
|
||||
if (age > TASK_TTL_MS) {
|
||||
const errorMessage = task.status === "pending"
|
||||
? "Task timed out while queued (30 minutes)"
|
||||
: "Task timed out after 30 minutes"
|
||||
|
||||
log("[background-agent] Pruning stale task:", { taskId, status: task.status, age: Math.round(age / 1000) + "s" })
|
||||
log("[background-agent] Pruning stale task:", { taskId, status: task.status, age: Math.round(((wasPending ? task.queuedAt?.getTime() : task.startedAt?.getTime()) ? (Date.now() - (wasPending ? task.queuedAt!.getTime() : task.startedAt!.getTime())) : 0) / 1000) + "s" })
|
||||
task.status = "error"
|
||||
task.error = errorMessage
|
||||
task.completedAt = new Date()
|
||||
@@ -1671,7 +1445,6 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
this.concurrencyManager.release(task.concurrencyKey)
|
||||
task.concurrencyKey = undefined
|
||||
}
|
||||
// Clean up pendingByParent to prevent stale entries
|
||||
this.cleanupPendingByParent(task)
|
||||
if (wasPending) {
|
||||
const key = task.model
|
||||
@@ -1698,97 +1471,21 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
subagentSessions.delete(task.sessionID)
|
||||
SessionCategoryRegistry.remove(task.sessionID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const [sessionID, notifications] of this.notifications.entries()) {
|
||||
if (notifications.length === 0) {
|
||||
this.notifications.delete(sessionID)
|
||||
continue
|
||||
}
|
||||
const validNotifications = notifications.filter((task) => {
|
||||
if (!task.startedAt) return false
|
||||
const age = now - task.startedAt.getTime()
|
||||
return age <= TASK_TTL_MS
|
||||
},
|
||||
})
|
||||
if (validNotifications.length === 0) {
|
||||
this.notifications.delete(sessionID)
|
||||
} else if (validNotifications.length !== notifications.length) {
|
||||
this.notifications.set(sessionID, validNotifications)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async checkAndInterruptStaleTasks(
|
||||
allStatuses: Record<string, { type: string }> = {},
|
||||
): Promise<void> {
|
||||
const staleTimeoutMs = this.config?.staleTimeoutMs ?? DEFAULT_STALE_TIMEOUT_MS
|
||||
const messageStalenessMs = this.config?.messageStalenessTimeoutMs ?? DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS
|
||||
const now = Date.now()
|
||||
|
||||
for (const task of this.tasks.values()) {
|
||||
if (task.status !== "running") continue
|
||||
|
||||
const startedAt = task.startedAt
|
||||
const sessionID = task.sessionID
|
||||
if (!startedAt || !sessionID) continue
|
||||
|
||||
const sessionStatus = allStatuses[sessionID]?.type
|
||||
const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
|
||||
const runtime = now - startedAt.getTime()
|
||||
|
||||
if (!task.progress?.lastUpdate) {
|
||||
if (sessionIsRunning) continue
|
||||
if (runtime <= messageStalenessMs) continue
|
||||
|
||||
const staleMinutes = Math.round(runtime / 60000)
|
||||
task.status = "cancelled"
|
||||
task.error = `Stale timeout (no activity for ${staleMinutes}min since start)`
|
||||
task.completedAt = new Date()
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
this.concurrencyManager.release(task.concurrencyKey)
|
||||
task.concurrencyKey = undefined
|
||||
}
|
||||
|
||||
this.client.session.abort({ path: { id: sessionID } }).catch(() => {})
|
||||
log(`[background-agent] Task ${task.id} interrupted: no progress since start`)
|
||||
|
||||
try {
|
||||
await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
|
||||
} catch (err) {
|
||||
log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err })
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (sessionIsRunning) continue
|
||||
|
||||
if (runtime < MIN_RUNTIME_BEFORE_STALE_MS) continue
|
||||
|
||||
const timeSinceLastUpdate = now - task.progress.lastUpdate.getTime()
|
||||
if (timeSinceLastUpdate <= staleTimeoutMs) continue
|
||||
if (task.status !== "running") continue
|
||||
|
||||
const staleMinutes = Math.round(timeSinceLastUpdate / 60000)
|
||||
task.status = "cancelled"
|
||||
task.error = `Stale timeout (no activity for ${staleMinutes}min)`
|
||||
task.completedAt = new Date()
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
this.concurrencyManager.release(task.concurrencyKey)
|
||||
task.concurrencyKey = undefined
|
||||
}
|
||||
|
||||
this.client.session.abort({ path: { id: sessionID } }).catch(() => {})
|
||||
log(`[background-agent] Task ${task.id} interrupted: stale timeout`)
|
||||
|
||||
try {
|
||||
await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
|
||||
} catch (err) {
|
||||
log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err })
|
||||
}
|
||||
}
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: this.tasks.values(),
|
||||
client: this.client,
|
||||
config: this.config,
|
||||
concurrencyManager: this.concurrencyManager,
|
||||
notifyParentSession: (task) => this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)),
|
||||
sessionStatuses: allStatuses,
|
||||
})
|
||||
}
|
||||
|
||||
private async pollRunningTasks(): Promise<void> {
|
||||
@@ -1948,89 +1645,3 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
return current
|
||||
}
|
||||
}
|
||||
|
||||
function registerProcessSignal(
|
||||
signal: ProcessCleanupEvent,
|
||||
handler: () => void,
|
||||
exitAfter: boolean
|
||||
): () => void {
|
||||
const listener = () => {
|
||||
handler()
|
||||
if (exitAfter) {
|
||||
// Set exitCode and schedule exit after delay to allow other handlers to complete async cleanup
|
||||
// Use 6s delay to accommodate LSP cleanup (5s timeout + 1s SIGKILL wait)
|
||||
process.exitCode = 0
|
||||
setTimeout(() => process.exit(), 6000)
|
||||
}
|
||||
}
|
||||
process.on(signal, listener)
|
||||
return listener
|
||||
}
|
||||
|
||||
|
||||
function getMessageDir(sessionID: string): string | null {
|
||||
if (!existsSync(MESSAGE_STORAGE)) return null
|
||||
|
||||
const directPath = join(MESSAGE_STORAGE, sessionID)
|
||||
if (existsSync(directPath)) return directPath
|
||||
|
||||
for (const dir of readdirSync(MESSAGE_STORAGE)) {
|
||||
const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
|
||||
if (existsSync(sessionPath)) return sessionPath
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function isCompactionAgent(agent: string | undefined): boolean {
|
||||
return agent?.trim().toLowerCase() === "compaction"
|
||||
}
|
||||
|
||||
function hasFullAgentAndModel(message: StoredMessage): boolean {
|
||||
return !!message.agent &&
|
||||
!isCompactionAgent(message.agent) &&
|
||||
!!message.model?.providerID &&
|
||||
!!message.model?.modelID
|
||||
}
|
||||
|
||||
function hasPartialAgentOrModel(message: StoredMessage): boolean {
|
||||
const hasAgent = !!message.agent && !isCompactionAgent(message.agent)
|
||||
const hasModel = !!message.model?.providerID && !!message.model?.modelID
|
||||
return hasAgent || hasModel
|
||||
}
|
||||
|
||||
function findNearestMessageExcludingCompaction(messageDir: string): StoredMessage | null {
|
||||
try {
|
||||
const files = readdirSync(messageDir)
|
||||
.filter((name) => name.endsWith(".json"))
|
||||
.sort()
|
||||
.reverse()
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = readFileSync(join(messageDir, file), "utf-8")
|
||||
const parsed = JSON.parse(content) as StoredMessage
|
||||
if (hasFullAgentAndModel(parsed)) {
|
||||
return parsed
|
||||
}
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = readFileSync(join(messageDir, file), "utf-8")
|
||||
const parsed = JSON.parse(content) as StoredMessage
|
||||
if (hasPartialAgentOrModel(parsed)) {
|
||||
return parsed
|
||||
}
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
import type { BackgroundTask } from "./types"
|
||||
|
||||
export function buildBackgroundTaskNotificationText(args: {
|
||||
task: BackgroundTask
|
||||
duration: string
|
||||
allComplete: boolean
|
||||
remainingCount: number
|
||||
completedTasks: BackgroundTask[]
|
||||
}): string {
|
||||
const { task, duration, allComplete, remainingCount, completedTasks } = args
|
||||
const statusText =
|
||||
task.status === "completed" ? "COMPLETED" : task.status === "interrupt" ? "INTERRUPTED" : task.status === "error" ? "ERROR" : "CANCELLED"
|
||||
const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
|
||||
|
||||
if (allComplete) {
|
||||
const completedTasksText = completedTasks
|
||||
.map((t) => `- \`${t.id}\`: ${t.description}`)
|
||||
.join("\n")
|
||||
|
||||
return `<system-reminder>
|
||||
[ALL BACKGROUND TASKS COMPLETE]
|
||||
|
||||
**Completed:**
|
||||
${completedTasksText || `- \`${task.id}\`: ${task.description}`}
|
||||
|
||||
Use \`background_output(task_id="<id>")\` to retrieve each result.
|
||||
</system-reminder>`
|
||||
}
|
||||
|
||||
return `<system-reminder>
|
||||
[BACKGROUND TASK ${statusText}]
|
||||
**ID:** \`${task.id}\`
|
||||
**Description:** ${task.description}
|
||||
**Duration:** ${duration}${errorInfo}
|
||||
|
||||
**${remainingCount} task${remainingCount === 1 ? "" : "s"} still in progress.** You WILL be notified when ALL complete.
|
||||
Do NOT poll - continue productive work.
|
||||
|
||||
Use \`background_output(task_id="${task.id}")\` to retrieve this result when ready.
|
||||
</system-reminder>`
|
||||
}
|
||||
81
src/features/background-agent/process-cleanup.ts
Normal file
81
src/features/background-agent/process-cleanup.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import { log } from "../../shared"
|
||||
|
||||
type ProcessCleanupEvent = NodeJS.Signals | "beforeExit" | "exit"
|
||||
|
||||
function registerProcessSignal(
|
||||
signal: ProcessCleanupEvent,
|
||||
handler: () => void,
|
||||
exitAfter: boolean
|
||||
): () => void {
|
||||
const listener = () => {
|
||||
handler()
|
||||
if (exitAfter) {
|
||||
process.exitCode = 0
|
||||
setTimeout(() => process.exit(), 6000)
|
||||
}
|
||||
}
|
||||
process.on(signal, listener)
|
||||
return listener
|
||||
}
|
||||
|
||||
interface CleanupTarget {
|
||||
shutdown(): void
|
||||
}
|
||||
|
||||
const cleanupManagers = new Set<CleanupTarget>()
|
||||
let cleanupRegistered = false
|
||||
const cleanupHandlers = new Map<ProcessCleanupEvent, () => void>()
|
||||
|
||||
export function registerManagerForCleanup(manager: CleanupTarget): void {
|
||||
cleanupManagers.add(manager)
|
||||
|
||||
if (cleanupRegistered) return
|
||||
cleanupRegistered = true
|
||||
|
||||
const cleanupAll = () => {
|
||||
for (const m of cleanupManagers) {
|
||||
try {
|
||||
m.shutdown()
|
||||
} catch (error) {
|
||||
log("[background-agent] Error during shutdown cleanup:", error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const registerSignal = (signal: ProcessCleanupEvent, exitAfter: boolean): void => {
|
||||
const listener = registerProcessSignal(signal, cleanupAll, exitAfter)
|
||||
cleanupHandlers.set(signal, listener)
|
||||
}
|
||||
|
||||
registerSignal("SIGINT", true)
|
||||
registerSignal("SIGTERM", true)
|
||||
if (process.platform === "win32") {
|
||||
registerSignal("SIGBREAK", true)
|
||||
}
|
||||
registerSignal("beforeExit", false)
|
||||
registerSignal("exit", false)
|
||||
}
|
||||
|
||||
export function unregisterManagerForCleanup(manager: CleanupTarget): void {
|
||||
cleanupManagers.delete(manager)
|
||||
|
||||
if (cleanupManagers.size > 0) return
|
||||
|
||||
for (const [signal, listener] of cleanupHandlers.entries()) {
|
||||
process.off(signal, listener)
|
||||
}
|
||||
cleanupHandlers.clear()
|
||||
cleanupRegistered = false
|
||||
}
|
||||
|
||||
/** @internal — test-only reset for module-level singleton state */
|
||||
export function _resetForTesting(): void {
|
||||
for (const manager of [...cleanupManagers]) {
|
||||
cleanupManagers.delete(manager)
|
||||
}
|
||||
for (const [signal, listener] of cleanupHandlers.entries()) {
|
||||
process.off(signal, listener)
|
||||
}
|
||||
cleanupHandlers.clear()
|
||||
cleanupRegistered = false
|
||||
}
|
||||
Reference in New Issue
Block a user