From 1c2caa09df6468efd9d17cf7dae6125781305d88 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Mon, 2 Mar 2026 23:07:39 +0900 Subject: [PATCH] fix(preemptive-compaction): allow re-compaction after context grows and use model-specific limits compactedSessions permanently blocked re-compaction after first success, causing unbounded context growth (e.g. 500k on Kimi K2.5 with 256k limit). - Clear compactedSessions flag on new message.updated so compaction can re-trigger when context exceeds threshold again - Use modelContextLimitsCache for model-specific context limits instead of always falling back to 200k for non-Anthropic providers --- src/hooks/preemptive-compaction.test.ts | 153 ++++++++++++++++++++++++ src/hooks/preemptive-compaction.ts | 12 +- 2 files changed, 161 insertions(+), 4 deletions(-) diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts index 279562aa6..e5d266c3d 100644 --- a/src/hooks/preemptive-compaction.test.ts +++ b/src/hooks/preemptive-compaction.test.ts @@ -414,4 +414,157 @@ describe("preemptive-compaction", () => { restoreTimeouts() } }) + + // #given first compaction succeeded and context grew again + // #when tool.execute.after runs after new high-token message + // #then should trigger compaction again (re-compaction) + it("should allow re-compaction when context grows after successful compaction", async () => { + const hook = createPreemptiveCompactionHook(ctx as never, {} as never) + const sessionID = "ses_recompact" + + // given - first compaction cycle + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + modelID: "claude-sonnet-4-6", + finish: true, + tokens: { + input: 170000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + { title: "", output: "test", metadata: null } + ) + + expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1) + + // when - new message with high tokens (context grew after compaction) + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "anthropic", + modelID: "claude-sonnet-4-6", + finish: true, + tokens: { + input: 170000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_2" }, + { title: "", output: "test", metadata: null } + ) + + // then - summarize should fire again + expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2) + }) + + // #given modelContextLimitsCache has model-specific limit (256k) + // #when tokens are above default 78% of 200k but below 78% of 256k + // #then should NOT trigger compaction + it("should use model-specific context limit from modelContextLimitsCache", async () => { + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144) + + const hook = createPreemptiveCompactionHook(ctx as never, {} as never, { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + const sessionID = "ses_kimi_limit" + + // 180k total tokens — above 78% of 200k (156k) but below 78% of 256k (204k) + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "opencode", + modelID: "kimi-k2.5-free", + finish: true, + tokens: { + input: 170000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + { title: "", output: "test", metadata: null } + ) + + expect(ctx.client.session.summarize).not.toHaveBeenCalled() + }) + + // #given modelContextLimitsCache has model-specific limit (256k) + // #when tokens exceed 78% of model-specific limit + // #then should trigger compaction + it("should trigger compaction at model-specific threshold", async () => { + const modelContextLimitsCache = new Map() + modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144) + + const hook = createPreemptiveCompactionHook(ctx as never, {} as never, { + anthropicContext1MEnabled: false, + modelContextLimitsCache, + }) + const sessionID = "ses_kimi_trigger" + + // 210k total — above 78% of 256k (≈204k) + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + role: "assistant", + sessionID, + providerID: "opencode", + modelID: "kimi-k2.5-free", + finish: true, + tokens: { + input: 200000, + output: 0, + reasoning: 0, + cache: { read: 10000, write: 0 }, + }, + }, + }, + }, + }) + + await hook["tool.execute.after"]( + { tool: "bash", sessionID, callID: "call_1" }, + { title: "", output: "test", metadata: null } + ) + + expect(ctx.client.session.summarize).toHaveBeenCalled() + }) }) diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts index d6c9bf130..d93211fd1 100644 --- a/src/hooks/preemptive-compaction.ts +++ b/src/hooks/preemptive-compaction.ts @@ -7,6 +7,7 @@ const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000 type ModelCacheStateLike = { anthropicContext1MEnabled: boolean + modelContextLimitsCache?: Map } function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { @@ -91,10 +92,12 @@ export function createPreemptiveCompactionHook( const cached = tokenCache.get(sessionID) if (!cached) return - const actualLimit = - isAnthropicProvider(cached.providerID) - ? getAnthropicActualLimit(modelCacheState) - : DEFAULT_ACTUAL_LIMIT + const modelSpecificLimit = !isAnthropicProvider(cached.providerID) + ? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`) + : undefined + const actualLimit = isAnthropicProvider(cached.providerID) + ? getAnthropicActualLimit(modelCacheState) + : modelSpecificLimit ?? DEFAULT_ACTUAL_LIMIT const lastTokens = cached.tokens const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) @@ -164,6 +167,7 @@ export function createPreemptiveCompactionHook( modelID: info.modelID ?? "", tokens: info.tokens, }) + compactedSessions.delete(info.sessionID) } }