Merge pull request #2664 from kilhyeonjun/fix/anthropic-1m-ga-context-limit
fix(shared): respect cached model context limits for Anthropic providers post-GA
This commit is contained in:
@@ -135,9 +135,96 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given Anthropic provider with cached context limit and 1M mode disabled", () => {
|
||||
describe("#when cached usage exceeds the Anthropic default limit", () => {
|
||||
it("#then should ignore the cached limit and append the reminder from the default Anthropic limit", async () => {
|
||||
describe("#given Anthropic 4.6 provider with cached context limit and 1M mode disabled", () => {
|
||||
describe("#when cached usage is below threshold of cached limit", () => {
|
||||
it("#then should respect the cached limit and skip the reminder", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
|
||||
|
||||
const hook = createContextWindowMonitorHook({} as never, {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_anthropic_cached_limit_respected"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-6",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 150000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// when
|
||||
const output = createOutput()
|
||||
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||
|
||||
// then — 160K/500K = 32%, well below 70% threshold
|
||||
expect(output.output).toBe("original")
|
||||
})
|
||||
})
|
||||
|
||||
describe("#when cached usage exceeds threshold of cached limit", () => {
|
||||
it("#then should use the cached limit for the reminder", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
|
||||
|
||||
const hook = createContextWindowMonitorHook({} as never, {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_anthropic_cached_limit_exceeded"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-6",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 350000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// when
|
||||
const output = createOutput()
|
||||
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||
|
||||
// then — 360K/500K = 72%, above 70% threshold, uses cached 500K limit
|
||||
expect(output.output).toContain("context remaining")
|
||||
expect(output.output).toContain("500,000-token context window")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("#given older Anthropic provider with cached context limit and 1M mode disabled", () => {
|
||||
describe("#when cached usage would only exceed the incorrect cached limit", () => {
|
||||
it("#then should ignore the cached limit and use the 200K default", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000)
|
||||
@@ -146,7 +233,7 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_anthropic_default_overrides_cached_limit"
|
||||
const sessionID = "ses_anthropic_older_model_ignores_cached_limit"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
@@ -176,8 +263,6 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
||||
// then
|
||||
expect(output.output).toContain("context remaining")
|
||||
expect(output.output).toContain("200,000-token context window")
|
||||
expect(output.output).not.toContain("500,000-token context window")
|
||||
expect(output.output).not.toContain("1,000,000-token context window")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -28,12 +28,29 @@ describe("resolveActualContextLimit", () => {
|
||||
resetContextLimitEnv()
|
||||
})
|
||||
|
||||
it("returns the default Anthropic limit when 1M mode is disabled despite a cached limit", () => {
|
||||
it("returns cached limit for Anthropic 4.6 models when 1M mode is disabled (GA support)", () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 123456)
|
||||
modelContextLimitsCache.set("anthropic/claude-opus-4-6", 1_000_000)
|
||||
|
||||
// when
|
||||
const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4-6", {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
|
||||
// then — models.dev reports 1M for GA models, resolver should respect it
|
||||
expect(actualLimit).toBe(1_000_000)
|
||||
})
|
||||
|
||||
it("returns default 200K for older Anthropic models even when cached limit is higher", () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500_000)
|
||||
|
||||
// when
|
||||
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
||||
@@ -42,7 +59,38 @@ describe("resolveActualContextLimit", () => {
|
||||
})
|
||||
|
||||
// then
|
||||
expect(actualLimit).toBe(200000)
|
||||
expect(actualLimit).toBe(200_000)
|
||||
})
|
||||
|
||||
it("returns default 200K for Anthropic models without cached limit and 1M mode disabled", () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||
|
||||
// when
|
||||
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
||||
anthropicContext1MEnabled: false,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(actualLimit).toBe(200_000)
|
||||
})
|
||||
|
||||
it("explicit 1M mode takes priority over cached limit", () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200_000)
|
||||
|
||||
// when
|
||||
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
||||
anthropicContext1MEnabled: true,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
|
||||
// then — explicit 1M flag overrides cached 200K
|
||||
expect(actualLimit).toBe(1_000_000)
|
||||
})
|
||||
|
||||
it("treats Anthropics aliases as Anthropic providers", () => {
|
||||
@@ -61,6 +109,23 @@ describe("resolveActualContextLimit", () => {
|
||||
expect(actualLimit).toBe(200000)
|
||||
})
|
||||
|
||||
it("supports Anthropic 4.6 dot-version model IDs without explicit 1M mode", () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-opus-4.6", 1_000_000)
|
||||
|
||||
// when
|
||||
const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4.6", {
|
||||
anthropicContext1MEnabled: false,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(actualLimit).toBe(1_000_000)
|
||||
})
|
||||
|
||||
it("returns null for non-Anthropic providers without a cached limit", () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
import process from "node:process"
|
||||
|
||||
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
|
||||
const ANTHROPIC_NO_HEADER_GA_MODEL_IDS = new Set([
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
"claude-sonnet-4-6",
|
||||
"claude-sonnet-4.6",
|
||||
])
|
||||
|
||||
export type ContextLimitModelCacheState = {
|
||||
anthropicContext1MEnabled: boolean
|
||||
@@ -20,13 +26,23 @@ function getAnthropicActualLimit(modelCacheState?: ContextLimitModelCacheState):
|
||||
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT
|
||||
}
|
||||
|
||||
function isAnthropicNoHeaderGaModel(modelID: string): boolean {
|
||||
return ANTHROPIC_NO_HEADER_GA_MODEL_IDS.has(modelID.toLowerCase())
|
||||
}
|
||||
|
||||
export function resolveActualContextLimit(
|
||||
providerID: string,
|
||||
modelID: string,
|
||||
modelCacheState?: ContextLimitModelCacheState,
|
||||
): number | null {
|
||||
if (isAnthropicProvider(providerID)) {
|
||||
return getAnthropicActualLimit(modelCacheState)
|
||||
const explicit1M = getAnthropicActualLimit(modelCacheState)
|
||||
if (explicit1M === 1_000_000) return explicit1M
|
||||
|
||||
const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`)
|
||||
if (cachedLimit && isAnthropicNoHeaderGaModel(modelID)) return cachedLimit
|
||||
|
||||
return DEFAULT_ANTHROPIC_ACTUAL_LIMIT
|
||||
}
|
||||
|
||||
return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null
|
||||
|
||||
Reference in New Issue
Block a user