Merge pull request #2664 from kilhyeonjun/fix/anthropic-1m-ga-context-limit

fix(shared): respect cached model context limits for Anthropic providers post-GA
This commit is contained in:
YeonGyu-Kim
2026-03-26 08:55:04 +09:00
committed by GitHub
3 changed files with 176 additions and 10 deletions

View File

@@ -135,9 +135,96 @@ describe("context-window-monitor modelContextLimitsCache", () => {
}) })
}) })
describe("#given Anthropic provider with cached context limit and 1M mode disabled", () => { describe("#given Anthropic 4.6 provider with cached context limit and 1M mode disabled", () => {
describe("#when cached usage exceeds the Anthropic default limit", () => { describe("#when cached usage is below threshold of cached limit", () => {
it("#then should ignore the cached limit and append the reminder from the default Anthropic limit", async () => { it("#then should respect the cached limit and skip the reminder", async () => {
// given
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
const hook = createContextWindowMonitorHook({} as never, {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
const sessionID = "ses_anthropic_cached_limit_respected"
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
role: "assistant",
sessionID,
providerID: "anthropic",
modelID: "claude-sonnet-4-6",
finish: true,
tokens: {
input: 150000,
output: 0,
reasoning: 0,
cache: { read: 10000, write: 0 },
},
},
},
},
})
// when
const output = createOutput()
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
// then — 160K/500K = 32%, well below 70% threshold
expect(output.output).toBe("original")
})
})
describe("#when cached usage exceeds threshold of cached limit", () => {
it("#then should use the cached limit for the reminder", async () => {
// given
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
const hook = createContextWindowMonitorHook({} as never, {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
const sessionID = "ses_anthropic_cached_limit_exceeded"
await hook.event({
event: {
type: "message.updated",
properties: {
info: {
role: "assistant",
sessionID,
providerID: "anthropic",
modelID: "claude-sonnet-4-6",
finish: true,
tokens: {
input: 350000,
output: 0,
reasoning: 0,
cache: { read: 10000, write: 0 },
},
},
},
},
})
// when
const output = createOutput()
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
// then — 360K/500K = 72%, above 70% threshold, uses cached 500K limit
expect(output.output).toContain("context remaining")
expect(output.output).toContain("500,000-token context window")
})
})
})
describe("#given older Anthropic provider with cached context limit and 1M mode disabled", () => {
describe("#when cached usage would only exceed the incorrect cached limit", () => {
it("#then should ignore the cached limit and use the 200K default", async () => {
// given // given
const modelContextLimitsCache = new Map<string, number>() const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000) modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000)
@@ -146,7 +233,7 @@ describe("context-window-monitor modelContextLimitsCache", () => {
anthropicContext1MEnabled: false, anthropicContext1MEnabled: false,
modelContextLimitsCache, modelContextLimitsCache,
}) })
const sessionID = "ses_anthropic_default_overrides_cached_limit" const sessionID = "ses_anthropic_older_model_ignores_cached_limit"
await hook.event({ await hook.event({
event: { event: {
@@ -176,8 +263,6 @@ describe("context-window-monitor modelContextLimitsCache", () => {
// then // then
expect(output.output).toContain("context remaining") expect(output.output).toContain("context remaining")
expect(output.output).toContain("200,000-token context window") expect(output.output).toContain("200,000-token context window")
expect(output.output).not.toContain("500,000-token context window")
expect(output.output).not.toContain("1,000,000-token context window")
}) })
}) })
}) })

View File

@@ -28,12 +28,29 @@ describe("resolveActualContextLimit", () => {
resetContextLimitEnv() resetContextLimitEnv()
}) })
it("returns the default Anthropic limit when 1M mode is disabled despite a cached limit", () => { it("returns cached limit for Anthropic 4.6 models when 1M mode is disabled (GA support)", () => {
// given // given
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
delete process.env[VERTEX_CONTEXT_ENV_KEY] delete process.env[VERTEX_CONTEXT_ENV_KEY]
const modelContextLimitsCache = new Map<string, number>() const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 123456) modelContextLimitsCache.set("anthropic/claude-opus-4-6", 1_000_000)
// when
const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4-6", {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
// then — models.dev reports 1M for GA models, resolver should respect it
expect(actualLimit).toBe(1_000_000)
})
it("returns default 200K for older Anthropic models even when cached limit is higher", () => {
// given
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
delete process.env[VERTEX_CONTEXT_ENV_KEY]
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500_000)
// when // when
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", { const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
@@ -42,7 +59,38 @@ describe("resolveActualContextLimit", () => {
}) })
// then // then
expect(actualLimit).toBe(200000) expect(actualLimit).toBe(200_000)
})
it("returns default 200K for Anthropic models without cached limit and 1M mode disabled", () => {
// given
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
delete process.env[VERTEX_CONTEXT_ENV_KEY]
// when
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
anthropicContext1MEnabled: false,
})
// then
expect(actualLimit).toBe(200_000)
})
it("explicit 1M mode takes priority over cached limit", () => {
// given
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
delete process.env[VERTEX_CONTEXT_ENV_KEY]
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200_000)
// when
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
anthropicContext1MEnabled: true,
modelContextLimitsCache,
})
// then — explicit 1M flag overrides cached 200K
expect(actualLimit).toBe(1_000_000)
}) })
it("treats Anthropics aliases as Anthropic providers", () => { it("treats Anthropics aliases as Anthropic providers", () => {
@@ -61,6 +109,23 @@ describe("resolveActualContextLimit", () => {
expect(actualLimit).toBe(200000) expect(actualLimit).toBe(200000)
}) })
it("supports Anthropic 4.6 dot-version model IDs without explicit 1M mode", () => {
// given
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
delete process.env[VERTEX_CONTEXT_ENV_KEY]
const modelContextLimitsCache = new Map<string, number>()
modelContextLimitsCache.set("anthropic/claude-opus-4.6", 1_000_000)
// when
const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4.6", {
anthropicContext1MEnabled: false,
modelContextLimitsCache,
})
// then
expect(actualLimit).toBe(1_000_000)
})
it("returns null for non-Anthropic providers without a cached limit", () => { it("returns null for non-Anthropic providers without a cached limit", () => {
// given // given
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY] delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]

View File

@@ -1,6 +1,12 @@
import process from "node:process" import process from "node:process"
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
const ANTHROPIC_NO_HEADER_GA_MODEL_IDS = new Set([
"claude-opus-4-6",
"claude-opus-4.6",
"claude-sonnet-4-6",
"claude-sonnet-4.6",
])
export type ContextLimitModelCacheState = { export type ContextLimitModelCacheState = {
anthropicContext1MEnabled: boolean anthropicContext1MEnabled: boolean
@@ -20,13 +26,23 @@ function getAnthropicActualLimit(modelCacheState?: ContextLimitModelCacheState):
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT : DEFAULT_ANTHROPIC_ACTUAL_LIMIT
} }
function isAnthropicNoHeaderGaModel(modelID: string): boolean {
return ANTHROPIC_NO_HEADER_GA_MODEL_IDS.has(modelID.toLowerCase())
}
export function resolveActualContextLimit( export function resolveActualContextLimit(
providerID: string, providerID: string,
modelID: string, modelID: string,
modelCacheState?: ContextLimitModelCacheState, modelCacheState?: ContextLimitModelCacheState,
): number | null { ): number | null {
if (isAnthropicProvider(providerID)) { if (isAnthropicProvider(providerID)) {
return getAnthropicActualLimit(modelCacheState) const explicit1M = getAnthropicActualLimit(modelCacheState)
if (explicit1M === 1_000_000) return explicit1M
const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`)
if (cachedLimit && isAnthropicNoHeaderGaModel(modelID)) return cachedLimit
return DEFAULT_ANTHROPIC_ACTUAL_LIMIT
} }
return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null