fix(preemptive-compaction): allow re-compaction after context grows and use model-specific limits

compactedSessions permanently blocked re-compaction after first success, causing unbounded context growth (e.g. 500k on Kimi K2.5 with 256k limit). - Clear compactedSessions flag on new message.updated so compaction can re-trigger when context exceeds threshold again - Use modelContextLimitsCache for model-specific context limits instead of always falling back to 200k for non-Anthropic providers
2026-03-02 23:07:39 +09:00
parent f27fd9a6de
commit 1c2caa09df
2 changed files with 161 additions and 4 deletions
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -414,4 +414,157 @@ describe("preemptive-compaction", () => {
      restoreTimeouts()
    }
  })
+
+  // #given first compaction succeeded and context grew again
+  // #when tool.execute.after runs after new high-token message
+  // #then should trigger compaction again (re-compaction)
+  it("should allow re-compaction when context grows after successful compaction", async () => {
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
+    const sessionID = "ses_recompact"
+
+    // given - first compaction cycle
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-6",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1)
+
+    // when - new message with high tokens (context grew after compaction)
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-6",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_2" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    // then - summarize should fire again
+    expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
+  })
+
+  // #given modelContextLimitsCache has model-specific limit (256k)
+  // #when tokens are above default 78% of 200k but below 78% of 256k
+  // #then should NOT trigger compaction
+  it("should use model-specific context limit from modelContextLimitsCache", async () => {
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+    const sessionID = "ses_kimi_limit"
+
+    // 180k total tokens — above 78% of 200k (156k) but below 78% of 256k (204k)
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "opencode",
+            modelID: "kimi-k2.5-free",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    expect(ctx.client.session.summarize).not.toHaveBeenCalled()
+  })
+
+  // #given modelContextLimitsCache has model-specific limit (256k)
+  // #when tokens exceed 78% of model-specific limit
+  // #then should trigger compaction
+  it("should trigger compaction at model-specific threshold", async () => {
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+    const sessionID = "ses_kimi_trigger"
+
+    // 210k total — above 78% of 256k (≈204k)
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "opencode",
+            modelID: "kimi-k2.5-free",
+            finish: true,
+            tokens: {
+              input: 200000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    expect(ctx.client.session.summarize).toHaveBeenCalled()
+  })
 })
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -7,6 +7,7 @@ const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000

 type ModelCacheStateLike = {
  anthropicContext1MEnabled: boolean
+  modelContextLimitsCache?: Map<string, number>
 }

 function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -91,10 +92,12 @@ export function createPreemptiveCompactionHook(
    const cached = tokenCache.get(sessionID)
    if (!cached) return

-    const actualLimit =
-      isAnthropicProvider(cached.providerID)
-        ? getAnthropicActualLimit(modelCacheState)
-        : DEFAULT_ACTUAL_LIMIT
+    const modelSpecificLimit = !isAnthropicProvider(cached.providerID)
+      ? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
+      : undefined
+    const actualLimit = isAnthropicProvider(cached.providerID)
+      ? getAnthropicActualLimit(modelCacheState)
+      : modelSpecificLimit ?? DEFAULT_ACTUAL_LIMIT

    const lastTokens = cached.tokens
    const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
@@ -164,6 +167,7 @@ export function createPreemptiveCompactionHook(
        modelID: info.modelID ?? "",
        tokens: info.tokens,
      })
+      compactedSessions.delete(info.sessionID)
    }
  }