Merge pull request #2664 from kilhyeonjun/fix/anthropic-1m-ga-context-limit

fix(shared): respect cached model context limits for Anthropic providers post-GA
2026-03-26 08:55:04 +09:00
parent 32f2c688e7 bf804b0626
commit 90919bf359
3 changed files with 176 additions and 10 deletions
--- a/src/hooks/context-window-monitor.model-context-limits.test.ts
+++ b/src/hooks/context-window-monitor.model-context-limits.test.ts
@@ -135,9 +135,96 @@ describe("context-window-monitor modelContextLimitsCache", () => {
    })
  })
-  describe("#given Anthropic provider with cached context limit and 1M mode disabled", () => {
+  describe("#given Anthropic 4.6 provider with cached context limit and 1M mode disabled", () => {
-    describe("#when cached usage exceeds the Anthropic default limit", () => {
+    describe("#when cached usage is below threshold of cached limit", () => {
-      it("#then should ignore the cached limit and append the reminder from the default Anthropic limit", async () => {
+      it("#then should respect the cached limit and skip the reminder", async () => {
        // given
        const modelContextLimitsCache = new Map<string, number>()
        modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
        const hook = createContextWindowMonitorHook({} as never, {
          anthropicContext1MEnabled: false,
          modelContextLimitsCache,
        })
        const sessionID = "ses_anthropic_cached_limit_respected"
        await hook.event({
          event: {
            type: "message.updated",
            properties: {
              info: {
                role: "assistant",
                sessionID,
                providerID: "anthropic",
                modelID: "claude-sonnet-4-6",
                finish: true,
                tokens: {
                  input: 150000,
                  output: 0,
                  reasoning: 0,
                  cache: { read: 10000, write: 0 },
                },
              },
            },
          },
        })
        // when
        const output = createOutput()
        await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
        // then — 160K/500K = 32%, well below 70% threshold
        expect(output.output).toBe("original")
      })
    })
    describe("#when cached usage exceeds threshold of cached limit", () => {
      it("#then should use the cached limit for the reminder", async () => {
        // given
        const modelContextLimitsCache = new Map<string, number>()
        modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
        const hook = createContextWindowMonitorHook({} as never, {
          anthropicContext1MEnabled: false,
          modelContextLimitsCache,
        })
        const sessionID = "ses_anthropic_cached_limit_exceeded"
        await hook.event({
          event: {
            type: "message.updated",
            properties: {
              info: {
                role: "assistant",
                sessionID,
                providerID: "anthropic",
                modelID: "claude-sonnet-4-6",
                finish: true,
                tokens: {
                  input: 350000,
                  output: 0,
                  reasoning: 0,
                  cache: { read: 10000, write: 0 },
                },
              },
            },
          },
        })
        // when
        const output = createOutput()
        await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
        // then — 360K/500K = 72%, above 70% threshold, uses cached 500K limit
        expect(output.output).toContain("context remaining")
        expect(output.output).toContain("500,000-token context window")
      })
    })
  })
  describe("#given older Anthropic provider with cached context limit and 1M mode disabled", () => {
    describe("#when cached usage would only exceed the incorrect cached limit", () => {
      it("#then should ignore the cached limit and use the 200K default", async () => {
        // given
        const modelContextLimitsCache = new Map<string, number>()
        modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000)
@@ -146,7 +233,7 @@ describe("context-window-monitor modelContextLimitsCache", () => {
          anthropicContext1MEnabled: false,
          modelContextLimitsCache,
        })
-        const sessionID = "ses_anthropic_default_overrides_cached_limit"
+        const sessionID = "ses_anthropic_older_model_ignores_cached_limit"
        await hook.event({
          event: {
@@ -176,8 +263,6 @@ describe("context-window-monitor modelContextLimitsCache", () => {
        // then
        expect(output.output).toContain("context remaining")
        expect(output.output).toContain("200,000-token context window")
        expect(output.output).not.toContain("500,000-token context window")
        expect(output.output).not.toContain("1,000,000-token context window")
      })
    })
  })
--- a/src/shared/context-limit-resolver.test.ts
+++ b/src/shared/context-limit-resolver.test.ts
@@ -28,12 +28,29 @@ describe("resolveActualContextLimit", () => {
    resetContextLimitEnv()
  })
-  it("returns the default Anthropic limit when 1M mode is disabled despite a cached limit", () => {
+  it("returns cached limit for Anthropic 4.6 models when 1M mode is disabled (GA support)", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
    delete process.env[VERTEX_CONTEXT_ENV_KEY]
    const modelContextLimitsCache = new Map<string, number>()
-    modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 123456)
+    modelContextLimitsCache.set("anthropic/claude-opus-4-6", 1_000_000)
    // when
    const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4-6", {
      anthropicContext1MEnabled: false,
      modelContextLimitsCache,
    })
    // then — models.dev reports 1M for GA models, resolver should respect it
    expect(actualLimit).toBe(1_000_000)
  })
  it("returns default 200K for older Anthropic models even when cached limit is higher", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
    delete process.env[VERTEX_CONTEXT_ENV_KEY]
    const modelContextLimitsCache = new Map<string, number>()
    modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500_000)
    // when
    const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
@@ -42,7 +59,38 @@ describe("resolveActualContextLimit", () => {
    })
    // then
-    expect(actualLimit).toBe(200000)
+    expect(actualLimit).toBe(200_000)
  })
  it("returns default 200K for Anthropic models without cached limit and 1M mode disabled", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
    delete process.env[VERTEX_CONTEXT_ENV_KEY]
    // when
    const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
      anthropicContext1MEnabled: false,
    })
    // then
    expect(actualLimit).toBe(200_000)
  })
  it("explicit 1M mode takes priority over cached limit", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
    delete process.env[VERTEX_CONTEXT_ENV_KEY]
    const modelContextLimitsCache = new Map<string, number>()
    modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200_000)
    // when
    const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
      anthropicContext1MEnabled: true,
      modelContextLimitsCache,
    })
    // then — explicit 1M flag overrides cached 200K
    expect(actualLimit).toBe(1_000_000)
  })
  it("treats Anthropics aliases as Anthropic providers", () => {
@@ -61,6 +109,23 @@ describe("resolveActualContextLimit", () => {
    expect(actualLimit).toBe(200000)
  })
  it("supports Anthropic 4.6 dot-version model IDs without explicit 1M mode", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
    delete process.env[VERTEX_CONTEXT_ENV_KEY]
    const modelContextLimitsCache = new Map<string, number>()
    modelContextLimitsCache.set("anthropic/claude-opus-4.6", 1_000_000)
    // when
    const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4.6", {
      anthropicContext1MEnabled: false,
      modelContextLimitsCache,
    })
    // then
    expect(actualLimit).toBe(1_000_000)
  })
  it("returns null for non-Anthropic providers without a cached limit", () => {
    // given
    delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
--- a/src/shared/context-limit-resolver.ts
+++ b/src/shared/context-limit-resolver.ts
@@ -1,6 +1,12 @@
 import process from "node:process"
 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
 const ANTHROPIC_NO_HEADER_GA_MODEL_IDS = new Set([
  "claude-opus-4-6",
  "claude-opus-4.6",
  "claude-sonnet-4-6",
  "claude-sonnet-4.6",
 ])
 export type ContextLimitModelCacheState = {
  anthropicContext1MEnabled: boolean
@@ -20,13 +26,23 @@ function getAnthropicActualLimit(modelCacheState?: ContextLimitModelCacheState):
    : DEFAULT_ANTHROPIC_ACTUAL_LIMIT
 }
 function isAnthropicNoHeaderGaModel(modelID: string): boolean {
  return ANTHROPIC_NO_HEADER_GA_MODEL_IDS.has(modelID.toLowerCase())
 }
 export function resolveActualContextLimit(
  providerID: string,
  modelID: string,
  modelCacheState?: ContextLimitModelCacheState,
 ): number | null {
  if (isAnthropicProvider(providerID)) {
-    return getAnthropicActualLimit(modelCacheState)
+    const explicit1M = getAnthropicActualLimit(modelCacheState)
    if (explicit1M === 1_000_000) return explicit1M
    const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`)
    if (cachedLimit && isAnthropicNoHeaderGaModel(modelID)) return cachedLimit
    return DEFAULT_ANTHROPIC_ACTUAL_LIMIT
  }
  return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null