Merge pull request #2664 from kilhyeonjun/fix/anthropic-1m-ga-context-limit

fix(shared): respect cached model context limits for Anthropic providers post-GA
2026-03-26 08:55:04 +09:00
parent 32f2c688e7 bf804b0626
commit 90919bf359
3 changed files with 176 additions and 10 deletions
--- a/src/hooks/context-window-monitor.model-context-limits.test.ts
+++ b/src/hooks/context-window-monitor.model-context-limits.test.ts
@@ -135,9 +135,96 @@ describe("context-window-monitor modelContextLimitsCache", () => {
    })
  })

-  describe("#given Anthropic provider with cached context limit and 1M mode disabled", () => {
-    describe("#when cached usage exceeds the Anthropic default limit", () => {
-      it("#then should ignore the cached limit and append the reminder from the default Anthropic limit", async () => {
+  describe("#given Anthropic 4.6 provider with cached context limit and 1M mode disabled", () => {
+    describe("#when cached usage is below threshold of cached limit", () => {
+      it("#then should respect the cached limit and skip the reminder", async () => {
+        // given
+        const modelContextLimitsCache = new Map<string, number>()
+        modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
+
+        const hook = createContextWindowMonitorHook({} as never, {
+          anthropicContext1MEnabled: false,
+          modelContextLimitsCache,
+        })
+        const sessionID = "ses_anthropic_cached_limit_respected"
+
+        await hook.event({
+          event: {
+            type: "message.updated",
+            properties: {
+              info: {
+                role: "assistant",
+                sessionID,
+                providerID: "anthropic",
+                modelID: "claude-sonnet-4-6",
+                finish: true,
+                tokens: {
+                  input: 150000,
+                  output: 0,
+                  reasoning: 0,
+                  cache: { read: 10000, write: 0 },
+                },
+              },
+            },
+          },
+        })
+
+        // when
+        const output = createOutput()
+        await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+        // then — 160K/500K = 32%, well below 70% threshold
+        expect(output.output).toBe("original")
+      })
+    })
+
+    describe("#when cached usage exceeds threshold of cached limit", () => {
+      it("#then should use the cached limit for the reminder", async () => {
+        // given
+        const modelContextLimitsCache = new Map<string, number>()
+        modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
+
+        const hook = createContextWindowMonitorHook({} as never, {
+          anthropicContext1MEnabled: false,
+          modelContextLimitsCache,
+        })
+        const sessionID = "ses_anthropic_cached_limit_exceeded"
+
+        await hook.event({
+          event: {
+            type: "message.updated",
+            properties: {
+              info: {
+                role: "assistant",
+                sessionID,
+                providerID: "anthropic",
+                modelID: "claude-sonnet-4-6",
+                finish: true,
+                tokens: {
+                  input: 350000,
+                  output: 0,
+                  reasoning: 0,
+                  cache: { read: 10000, write: 0 },
+                },
+              },
+            },
+          },
+        })
+
+        // when
+        const output = createOutput()
+        await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
+
+        // then — 360K/500K = 72%, above 70% threshold, uses cached 500K limit
+        expect(output.output).toContain("context remaining")
+        expect(output.output).toContain("500,000-token context window")
+      })
+    })
+  })
+
+  describe("#given older Anthropic provider with cached context limit and 1M mode disabled", () => {
+    describe("#when cached usage would only exceed the incorrect cached limit", () => {
+      it("#then should ignore the cached limit and use the 200K default", async () => {
        // given
        const modelContextLimitsCache = new Map<string, number>()
        modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000)
@@ -146,7 +233,7 @@ describe("context-window-monitor modelContextLimitsCache", () => {
          anthropicContext1MEnabled: false,
          modelContextLimitsCache,
        })
-        const sessionID = "ses_anthropic_default_overrides_cached_limit"
+        const sessionID = "ses_anthropic_older_model_ignores_cached_limit"

        await hook.event({
          event: {
@@ -176,8 +263,6 @@ describe("context-window-monitor modelContextLimitsCache", () => {
        // then
        expect(output.output).toContain("context remaining")
        expect(output.output).toContain("200,000-token context window")
-        expect(output.output).not.toContain("500,000-token context window")
-        expect(output.output).not.toContain("1,000,000-token context window")
      })
    })
  })
--- a/src/shared/context-limit-resolver.test.ts
+++ b/src/shared/context-limit-resolver.test.ts
@@ -28,12 +28,29 @@ describe("resolveActualContextLimit", () => {
    resetContextLimitEnv()
  })

-  it("returns the default Anthropic limit when 1M mode is disabled despite a cached limit", () => {
+  it("returns cached limit for Anthropic 4.6 models when 1M mode is disabled (GA support)", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
    delete process.env[VERTEX_CONTEXT_ENV_KEY]
    const modelContextLimitsCache = new Map<string, number>()
-    modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 123456)
+    modelContextLimitsCache.set("anthropic/claude-opus-4-6", 1_000_000)
+
+    // when
+    const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4-6", {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+
+    // then — models.dev reports 1M for GA models, resolver should respect it
+    expect(actualLimit).toBe(1_000_000)
+  })
+
+  it("returns default 200K for older Anthropic models even when cached limit is higher", () => {
+    // given
+    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
+    delete process.env[VERTEX_CONTEXT_ENV_KEY]
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500_000)

    // when
    const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
@@ -42,7 +59,38 @@ describe("resolveActualContextLimit", () => {
    })

    // then
-    expect(actualLimit).toBe(200000)
+    expect(actualLimit).toBe(200_000)
+  })
+
+  it("returns default 200K for Anthropic models without cached limit and 1M mode disabled", () => {
+    // given
+    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
+    delete process.env[VERTEX_CONTEXT_ENV_KEY]
+
+    // when
+    const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
+      anthropicContext1MEnabled: false,
+    })
+
+    // then
+    expect(actualLimit).toBe(200_000)
+  })
+
+  it("explicit 1M mode takes priority over cached limit", () => {
+    // given
+    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
+    delete process.env[VERTEX_CONTEXT_ENV_KEY]
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200_000)
+
+    // when
+    const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
+      anthropicContext1MEnabled: true,
+      modelContextLimitsCache,
+    })
+
+    // then — explicit 1M flag overrides cached 200K
+    expect(actualLimit).toBe(1_000_000)
  })

  it("treats Anthropics aliases as Anthropic providers", () => {
@@ -61,6 +109,23 @@ describe("resolveActualContextLimit", () => {
    expect(actualLimit).toBe(200000)
  })

+  it("supports Anthropic 4.6 dot-version model IDs without explicit 1M mode", () => {
+    // given
+    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
+    delete process.env[VERTEX_CONTEXT_ENV_KEY]
+    const modelContextLimitsCache = new Map<string, number>()
+    modelContextLimitsCache.set("anthropic/claude-opus-4.6", 1_000_000)
+
+    // when
+    const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4.6", {
+      anthropicContext1MEnabled: false,
+      modelContextLimitsCache,
+    })
+
+    // then
+    expect(actualLimit).toBe(1_000_000)
+  })
+
  it("returns null for non-Anthropic providers without a cached limit", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
--- a/src/shared/context-limit-resolver.ts
+++ b/src/shared/context-limit-resolver.ts
@@ -1,6 +1,12 @@
 import process from "node:process"

 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
+const ANTHROPIC_NO_HEADER_GA_MODEL_IDS = new Set([
+  "claude-opus-4-6",
+  "claude-opus-4.6",
+  "claude-sonnet-4-6",
+  "claude-sonnet-4.6",
+])

 export type ContextLimitModelCacheState = {
  anthropicContext1MEnabled: boolean
@@ -20,13 +26,23 @@ function getAnthropicActualLimit(modelCacheState?: ContextLimitModelCacheState):
    : DEFAULT_ANTHROPIC_ACTUAL_LIMIT
 }

+function isAnthropicNoHeaderGaModel(modelID: string): boolean {
+  return ANTHROPIC_NO_HEADER_GA_MODEL_IDS.has(modelID.toLowerCase())
+}
+
 export function resolveActualContextLimit(
  providerID: string,
  modelID: string,
  modelCacheState?: ContextLimitModelCacheState,
 ): number | null {
  if (isAnthropicProvider(providerID)) {
-    return getAnthropicActualLimit(modelCacheState)
+    const explicit1M = getAnthropicActualLimit(modelCacheState)
+    if (explicit1M === 1_000_000) return explicit1M
+
+    const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`)
+    if (cachedLimit && isAnthropicNoHeaderGaModel(modelID)) return cachedLimit
+
+    return DEFAULT_ANTHROPIC_ACTUAL_LIMIT
  }

  return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null