fix(preemptive-compaction): allow re-compaction after context grows and use model-specific limits
compactedSessions permanently blocked re-compaction after first success, causing unbounded context growth (e.g. 500k on Kimi K2.5 with 256k limit). - Clear compactedSessions flag on new message.updated so compaction can re-trigger when context exceeds threshold again - Use modelContextLimitsCache for model-specific context limits instead of always falling back to 200k for non-Anthropic providers
This commit is contained in:
@@ -414,4 +414,157 @@ describe("preemptive-compaction", () => {
|
||||
restoreTimeouts()
|
||||
}
|
||||
})
|
||||
|
||||
// #given first compaction succeeded and context grew again
|
||||
// #when tool.execute.after runs after new high-token message
|
||||
// #then should trigger compaction again (re-compaction)
|
||||
it("should allow re-compaction when context grows after successful compaction", async () => {
|
||||
const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
|
||||
const sessionID = "ses_recompact"
|
||||
|
||||
// given - first compaction cycle
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-6",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 170000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "bash", sessionID, callID: "call_1" },
|
||||
{ title: "", output: "test", metadata: null }
|
||||
)
|
||||
|
||||
expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1)
|
||||
|
||||
// when - new message with high tokens (context grew after compaction)
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-6",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 170000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "bash", sessionID, callID: "call_2" },
|
||||
{ title: "", output: "test", metadata: null }
|
||||
)
|
||||
|
||||
// then - summarize should fire again
|
||||
expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
|
||||
})
|
||||
|
||||
// #given modelContextLimitsCache has model-specific limit (256k)
|
||||
// #when tokens are above default 78% of 200k but below 78% of 256k
|
||||
// #then should NOT trigger compaction
|
||||
it("should use model-specific context limit from modelContextLimitsCache", async () => {
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
|
||||
|
||||
const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_kimi_limit"
|
||||
|
||||
// 180k total tokens — above 78% of 200k (156k) but below 78% of 256k (204k)
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "opencode",
|
||||
modelID: "kimi-k2.5-free",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 170000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "bash", sessionID, callID: "call_1" },
|
||||
{ title: "", output: "test", metadata: null }
|
||||
)
|
||||
|
||||
expect(ctx.client.session.summarize).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
// #given modelContextLimitsCache has model-specific limit (256k)
|
||||
// #when tokens exceed 78% of model-specific limit
|
||||
// #then should trigger compaction
|
||||
it("should trigger compaction at model-specific threshold", async () => {
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("opencode/kimi-k2.5-free", 262144)
|
||||
|
||||
const hook = createPreemptiveCompactionHook(ctx as never, {} as never, {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_kimi_trigger"
|
||||
|
||||
// 210k total — above 78% of 256k (≈204k)
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "opencode",
|
||||
modelID: "kimi-k2.5-free",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 200000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "bash", sessionID, callID: "call_1" },
|
||||
{ title: "", output: "test", metadata: null }
|
||||
)
|
||||
|
||||
expect(ctx.client.session.summarize).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -7,6 +7,7 @@ const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
|
||||
|
||||
type ModelCacheStateLike = {
|
||||
anthropicContext1MEnabled: boolean
|
||||
modelContextLimitsCache?: Map<string, number>
|
||||
}
|
||||
|
||||
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||
@@ -91,10 +92,12 @@ export function createPreemptiveCompactionHook(
|
||||
const cached = tokenCache.get(sessionID)
|
||||
if (!cached) return
|
||||
|
||||
const actualLimit =
|
||||
isAnthropicProvider(cached.providerID)
|
||||
? getAnthropicActualLimit(modelCacheState)
|
||||
: DEFAULT_ACTUAL_LIMIT
|
||||
const modelSpecificLimit = !isAnthropicProvider(cached.providerID)
|
||||
? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
|
||||
: undefined
|
||||
const actualLimit = isAnthropicProvider(cached.providerID)
|
||||
? getAnthropicActualLimit(modelCacheState)
|
||||
: modelSpecificLimit ?? DEFAULT_ACTUAL_LIMIT
|
||||
|
||||
const lastTokens = cached.tokens
|
||||
const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
|
||||
@@ -164,6 +167,7 @@ export function createPreemptiveCompactionHook(
|
||||
modelID: info.modelID ?? "",
|
||||
tokens: info.tokens,
|
||||
})
|
||||
compactedSessions.delete(info.sessionID)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user