127 lines
3.5 KiB
TypeScript
127 lines
3.5 KiB
TypeScript
import { log } from "../shared/logger"
|
|
|
|
const DEFAULT_ACTUAL_LIMIT = 200_000
|
|
|
|
const ANTHROPIC_ACTUAL_LIMIT =
|
|
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
|
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
|
? 1_000_000
|
|
: DEFAULT_ACTUAL_LIMIT
|
|
|
|
const PREEMPTIVE_COMPACTION_THRESHOLD = 0.78
|
|
|
|
interface TokenInfo {
|
|
input: number
|
|
output: number
|
|
reasoning: number
|
|
cache: { read: number; write: number }
|
|
}
|
|
|
|
interface CachedCompactionState {
|
|
providerID: string
|
|
modelID: string
|
|
tokens: TokenInfo
|
|
}
|
|
|
|
type PluginInput = {
|
|
client: {
|
|
session: {
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
messages: (...args: any[]) => any
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
summarize: (...args: any[]) => any
|
|
}
|
|
tui: {
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
showToast: (...args: any[]) => any
|
|
}
|
|
}
|
|
directory: string
|
|
}
|
|
|
|
export function createPreemptiveCompactionHook(ctx: PluginInput) {
|
|
const compactionInProgress = new Set<string>()
|
|
const compactedSessions = new Set<string>()
|
|
const tokenCache = new Map<string, CachedCompactionState>()
|
|
|
|
const toolExecuteAfter = async (
|
|
input: { tool: string; sessionID: string; callID: string },
|
|
_output: { title: string; output: string; metadata: unknown }
|
|
) => {
|
|
const { sessionID } = input
|
|
if (compactedSessions.has(sessionID) || compactionInProgress.has(sessionID)) return
|
|
|
|
const cached = tokenCache.get(sessionID)
|
|
if (!cached) return
|
|
|
|
const actualLimit =
|
|
cached.providerID === "anthropic"
|
|
? ANTHROPIC_ACTUAL_LIMIT
|
|
: DEFAULT_ACTUAL_LIMIT
|
|
|
|
const lastTokens = cached.tokens
|
|
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
|
|
const usageRatio = totalInputTokens / actualLimit
|
|
|
|
if (usageRatio < PREEMPTIVE_COMPACTION_THRESHOLD) return
|
|
|
|
const modelID = cached.modelID
|
|
if (!modelID) return
|
|
|
|
compactionInProgress.add(sessionID)
|
|
|
|
try {
|
|
await ctx.client.session.summarize({
|
|
path: { id: sessionID },
|
|
body: { providerID: cached.providerID, modelID, auto: true } as never,
|
|
query: { directory: ctx.directory },
|
|
})
|
|
|
|
compactedSessions.add(sessionID)
|
|
} catch (error) {
|
|
log("[preemptive-compaction] Compaction failed", { sessionID, error: String(error) })
|
|
} finally {
|
|
compactionInProgress.delete(sessionID)
|
|
}
|
|
}
|
|
|
|
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
|
|
const props = event.properties as Record<string, unknown> | undefined
|
|
|
|
if (event.type === "session.deleted") {
|
|
const sessionInfo = props?.info as { id?: string } | undefined
|
|
if (sessionInfo?.id) {
|
|
compactionInProgress.delete(sessionInfo.id)
|
|
compactedSessions.delete(sessionInfo.id)
|
|
tokenCache.delete(sessionInfo.id)
|
|
}
|
|
return
|
|
}
|
|
|
|
if (event.type === "message.updated") {
|
|
const info = props?.info as {
|
|
role?: string
|
|
sessionID?: string
|
|
providerID?: string
|
|
modelID?: string
|
|
finish?: boolean
|
|
tokens?: TokenInfo
|
|
} | undefined
|
|
|
|
if (!info || info.role !== "assistant" || !info.finish) return
|
|
if (!info.sessionID || !info.providerID || !info.tokens) return
|
|
|
|
tokenCache.set(info.sessionID, {
|
|
providerID: info.providerID,
|
|
modelID: info.modelID ?? "",
|
|
tokens: info.tokens,
|
|
})
|
|
}
|
|
}
|
|
|
|
return {
|
|
"tool.execute.after": toolExecuteAfter,
|
|
event: eventHandler,
|
|
}
|
|
}
|