fix: prioritize Anthropic 1M limits over cached context limits

2026-03-11 20:38:44 +09:00
parent d40d686014
commit 4516b2e484
4 changed files with 84 additions and 11 deletions
--- a/src/hooks/context-window-monitor.model-context-limits.test.ts
+++ b/src/hooks/context-window-monitor.model-context-limits.test.ts
@@ -90,4 +90,48 @@ describe("context-window-monitor modelContextLimitsCache", () => {
    expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]")
    expect(output.output).not.toContain("1,000,000")
  })
+
+  describe("#given Anthropic provider with cached context limit and 1M mode enabled", () => {
+    describe("#when cached usage would exceed 200K but stay below 1M", () => {
+      it("#then should ignore the cached limit and skip the reminder", async () => {
+        // given
+        const modelContextLimitsCache = new Map<string, number>()
+        modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200000)
+
+        const hook = createContextWindowMonitorHook({} as never, {
+          anthropicContext1MEnabled: true,
+          modelContextLimitsCache,
+        })
+        const sessionID = "ses_anthropic_1m_overrides_cached_limit"
+
+        await hook.event({
+          event: {
+            type: "message.updated",
+            properties: {
+              info: {
+                role: "assistant",
+                sessionID,
+                providerID: "anthropic",
+                modelID: "claude-sonnet-4-5",
+                finish: true,
+                tokens: {
+                  input: 300000,
+                  output: 0,
+                  reasoning: 0,
+                  cache: { read: 0, write: 0 },
+                },
+              },
+            },
+          },
+        })
+
+        // when
+        const output = createOutput()
+        await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+        // then
+        expect(output.output).toBe("original")
+      })
+    })
+  })
 })
--- a/src/hooks/context-window-monitor.ts
+++ b/src/hooks/context-window-monitor.ts
@@ -62,12 +62,12 @@ export function createContextWindowMonitorHook(
    const cached = tokenCache.get(sessionID)
    if (!cached) return

-    const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(
-      `${cached.providerID}/${cached.modelID}`
-    )
-    const actualLimit =
-      cachedLimit ??
-      (isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null)
+    const modelSpecificLimit = !isAnthropicProvider(cached.providerID)
+      ? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
+      : undefined
+    const actualLimit = isAnthropicProvider(cached.providerID)
+      ? getAnthropicActualLimit(modelCacheState)
+      : modelSpecificLimit

    if (!actualLimit) return

--- a/src/shared/dynamic-truncator.test.ts
+++ b/src/shared/dynamic-truncator.test.ts
@@ -124,4 +124,32 @@ describe("getContextWindowUsage", () => {
    expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
    expect(usage?.remainingTokens).toBe(82144)
  })
+
+  describe("#given Anthropic provider with cached context limit and 1M mode enabled", () => {
+    describe("#when context usage is resolved", () => {
+      it("#then should ignore the cached limit and use the 1M Anthropic limit", async () => {
+        // given
+        delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
+        delete process.env[VERTEX_CONTEXT_ENV_KEY]
+
+        const modelContextLimitsCache = new Map<string, number>()
+        modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200000)
+
+        const ctx = createContextUsageMockContext(300000, {
+          providerID: "anthropic",
+          modelID: "claude-sonnet-4-5",
+        })
+
+        // when
+        const usage = await getContextWindowUsage(ctx as never, "ses_cached_anthropic_1m", {
+          anthropicContext1MEnabled: true,
+          modelContextLimitsCache,
+        })
+
+        // then
+        expect(usage?.usagePercentage).toBe(0.3)
+        expect(usage?.remainingTokens).toBe(700000)
+      })
+    })
+  })
 })
--- a/src/shared/dynamic-truncator.ts
+++ b/src/shared/dynamic-truncator.ts
@@ -148,17 +148,18 @@ export async function getContextWindowUsage(
 		const lastTokens = lastAssistant?.tokens;
 		if (!lastAssistant || !lastTokens) return null;

-		const cachedLimit =
-			lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
+		const modelSpecificLimit =
+			lastAssistant.providerID !== undefined &&
+			lastAssistant.modelID !== undefined &&
+			!isAnthropicProvider(lastAssistant.providerID)
 				? modelCacheState?.modelContextLimitsCache?.get(
 					`${lastAssistant.providerID}/${lastAssistant.modelID}`,
 				)
 				: undefined;
 		const actualLimit =
-			cachedLimit ??
-			(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
+			lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
 				? getAnthropicActualLimit(modelCacheState)
-				: null);
+				: modelSpecificLimit ?? null;

 		if (!actualLimit) return null;