From 1c2caa09df6468efd9d17cf7dae6125781305d88 Mon Sep 17 00:00:00 2001
From: YeonGyu-Kim <code.yeon.gyu@gmail.com>
Date: Mon, 2 Mar 2026 23:07:39 +0900
Subject: [PATCH] fix(preemptive-compaction): allow re-compaction after context
 grows and use model-specific limits

compactedSessions permanently blocked re-compaction after first success,
causing unbounded context growth (e.g. 500k on Kimi K2.5 with 256k limit).

- Clear compactedSessions flag on new message.updated so compaction can
  re-trigger when context exceeds threshold again
- Use modelContextLimitsCache for model-specific context limits instead
  of always falling back to 200k for non-Anthropic providers
---
 src/hooks/preemptive-compaction.test.ts | 153 ++++++++++++++++++++++++
 src/hooks/preemptive-compaction.ts      |  12 +-
 2 files changed, 161 insertions(+), 4 deletions(-)

diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts
index 279562aa6..e5d266c3d 100644
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -414,4 +414,157 @@ describe("preemptive-compaction", () => {
       restoreTimeouts()
     }
   })
+
+  // #given first compaction succeeded and context grew again
+  // #when tool.execute.after runs after new high-token message
+  // #then should trigger compaction again (re-compaction)
+  it("should allow re-compaction when context grows after successful compaction", async () => {
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
+    const sessionID = "ses_recompact"
+
+    // given - first compaction cycle
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-6",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1)
+
+    // when - new message with high tokens (context grew after compaction)
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-6",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_2" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    // then - summarize should fire again
+    expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
+  })
+
+  // #given modelContextLimitsCache has model-specific limit (256k)
+  // #when tokens are above default 78% of 200k but below 78% of 256k
+  // #then should NOT trigger compaction
+  it("should use model-specific context limit from modelContextLimitsCache", async () => {
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+    const sessionID = "ses_kimi_limit"
+
+    // 180k total tokens — above 78% of 200k (156k) but below 78% of 256k (204k)
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "opencode",
+            modelID: "kimi-k2.5-free",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    expect(ctx.client.session.summarize).not.toHaveBeenCalled()
+  })
+
+  // #given modelContextLimitsCache has model-specific limit (256k)
+  // #when tokens exceed 78% of model-specific limit
+  // #then should trigger compaction
+  it("should trigger compaction at model-specific threshold", async () => {
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
+
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+    const sessionID = "ses_kimi_trigger"
+
+    // 210k total — above 78% of 256k (≈204k)
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "opencode",
+            modelID: "kimi-k2.5-free",
+            finish: true,
+            tokens: {
+              input: 200000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    expect(ctx.client.session.summarize).toHaveBeenCalled()
+  })
 })
diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts
index d6c9bf130..d93211fd1 100644
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -7,6 +7,7 @@ const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
 
 type ModelCacheStateLike = {
   anthropicContext1MEnabled: boolean
+  modelContextLimitsCache?: Map<string, number>
 }
 
 function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
@@ -91,10 +92,12 @@ export function createPreemptiveCompactionHook(
     const cached = tokenCache.get(sessionID)
     if (!cached) return
 
-    const actualLimit =
-      isAnthropicProvider(cached.providerID)
-        ? getAnthropicActualLimit(modelCacheState)
-        : DEFAULT_ACTUAL_LIMIT
+    const modelSpecificLimit = !isAnthropicProvider(cached.providerID)
+      ? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
+      : undefined
+    const actualLimit = isAnthropicProvider(cached.providerID)
+      ? getAnthropicActualLimit(modelCacheState)
+      : modelSpecificLimit ?? DEFAULT_ACTUAL_LIMIT
 
     const lastTokens = cached.tokens
     const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
@@ -164,6 +167,7 @@ export function createPreemptiveCompactionHook(
         modelID: info.modelID ?? "",
         tokens: info.tokens,
       })
+      compactedSessions.delete(info.sessionID)
     }
   }