Merge pull request #2664 from kilhyeonjun/fix/anthropic-1m-ga-context-limit
fix(shared): respect cached model context limits for Anthropic providers post-GA
This commit is contained in:
@@ -135,9 +135,96 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe("#given Anthropic provider with cached context limit and 1M mode disabled", () => {
|
describe("#given Anthropic 4.6 provider with cached context limit and 1M mode disabled", () => {
|
||||||
describe("#when cached usage exceeds the Anthropic default limit", () => {
|
describe("#when cached usage is below threshold of cached limit", () => {
|
||||||
it("#then should ignore the cached limit and append the reminder from the default Anthropic limit", async () => {
|
it("#then should respect the cached limit and skip the reminder", async () => {
|
||||||
|
// given
|
||||||
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
|
modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
|
||||||
|
|
||||||
|
const hook = createContextWindowMonitorHook({} as never, {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
modelContextLimitsCache,
|
||||||
|
})
|
||||||
|
const sessionID = "ses_anthropic_cached_limit_respected"
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "message.updated",
|
||||||
|
properties: {
|
||||||
|
info: {
|
||||||
|
role: "assistant",
|
||||||
|
sessionID,
|
||||||
|
providerID: "anthropic",
|
||||||
|
modelID: "claude-sonnet-4-6",
|
||||||
|
finish: true,
|
||||||
|
tokens: {
|
||||||
|
input: 150000,
|
||||||
|
output: 0,
|
||||||
|
reasoning: 0,
|
||||||
|
cache: { read: 10000, write: 0 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// when
|
||||||
|
const output = createOutput()
|
||||||
|
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||||
|
|
||||||
|
// then — 160K/500K = 32%, well below 70% threshold
|
||||||
|
expect(output.output).toBe("original")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#when cached usage exceeds threshold of cached limit", () => {
|
||||||
|
it("#then should use the cached limit for the reminder", async () => {
|
||||||
|
// given
|
||||||
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
|
modelContextLimitsCache.set("anthropic/claude-sonnet-4-6", 500000)
|
||||||
|
|
||||||
|
const hook = createContextWindowMonitorHook({} as never, {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
modelContextLimitsCache,
|
||||||
|
})
|
||||||
|
const sessionID = "ses_anthropic_cached_limit_exceeded"
|
||||||
|
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "message.updated",
|
||||||
|
properties: {
|
||||||
|
info: {
|
||||||
|
role: "assistant",
|
||||||
|
sessionID,
|
||||||
|
providerID: "anthropic",
|
||||||
|
modelID: "claude-sonnet-4-6",
|
||||||
|
finish: true,
|
||||||
|
tokens: {
|
||||||
|
input: 350000,
|
||||||
|
output: 0,
|
||||||
|
reasoning: 0,
|
||||||
|
cache: { read: 10000, write: 0 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// when
|
||||||
|
const output = createOutput()
|
||||||
|
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||||
|
|
||||||
|
// then — 360K/500K = 72%, above 70% threshold, uses cached 500K limit
|
||||||
|
expect(output.output).toContain("context remaining")
|
||||||
|
expect(output.output).toContain("500,000-token context window")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe("#given older Anthropic provider with cached context limit and 1M mode disabled", () => {
|
||||||
|
describe("#when cached usage would only exceed the incorrect cached limit", () => {
|
||||||
|
it("#then should ignore the cached limit and use the 200K default", async () => {
|
||||||
// given
|
// given
|
||||||
const modelContextLimitsCache = new Map<string, number>()
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000)
|
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500000)
|
||||||
@@ -146,7 +233,7 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
|||||||
anthropicContext1MEnabled: false,
|
anthropicContext1MEnabled: false,
|
||||||
modelContextLimitsCache,
|
modelContextLimitsCache,
|
||||||
})
|
})
|
||||||
const sessionID = "ses_anthropic_default_overrides_cached_limit"
|
const sessionID = "ses_anthropic_older_model_ignores_cached_limit"
|
||||||
|
|
||||||
await hook.event({
|
await hook.event({
|
||||||
event: {
|
event: {
|
||||||
@@ -176,8 +263,6 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
|||||||
// then
|
// then
|
||||||
expect(output.output).toContain("context remaining")
|
expect(output.output).toContain("context remaining")
|
||||||
expect(output.output).toContain("200,000-token context window")
|
expect(output.output).toContain("200,000-token context window")
|
||||||
expect(output.output).not.toContain("500,000-token context window")
|
|
||||||
expect(output.output).not.toContain("1,000,000-token context window")
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -28,12 +28,29 @@ describe("resolveActualContextLimit", () => {
|
|||||||
resetContextLimitEnv()
|
resetContextLimitEnv()
|
||||||
})
|
})
|
||||||
|
|
||||||
it("returns the default Anthropic limit when 1M mode is disabled despite a cached limit", () => {
|
it("returns cached limit for Anthropic 4.6 models when 1M mode is disabled (GA support)", () => {
|
||||||
// given
|
// given
|
||||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||||
const modelContextLimitsCache = new Map<string, number>()
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 123456)
|
modelContextLimitsCache.set("anthropic/claude-opus-4-6", 1_000_000)
|
||||||
|
|
||||||
|
// when
|
||||||
|
const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4-6", {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
modelContextLimitsCache,
|
||||||
|
})
|
||||||
|
|
||||||
|
// then — models.dev reports 1M for GA models, resolver should respect it
|
||||||
|
expect(actualLimit).toBe(1_000_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
it("returns default 200K for older Anthropic models even when cached limit is higher", () => {
|
||||||
|
// given
|
||||||
|
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||||
|
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||||
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
|
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 500_000)
|
||||||
|
|
||||||
// when
|
// when
|
||||||
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
||||||
@@ -42,7 +59,38 @@ describe("resolveActualContextLimit", () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
// then
|
// then
|
||||||
expect(actualLimit).toBe(200000)
|
expect(actualLimit).toBe(200_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
it("returns default 200K for Anthropic models without cached limit and 1M mode disabled", () => {
|
||||||
|
// given
|
||||||
|
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||||
|
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||||
|
|
||||||
|
// when
|
||||||
|
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
})
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(actualLimit).toBe(200_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
it("explicit 1M mode takes priority over cached limit", () => {
|
||||||
|
// given
|
||||||
|
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||||
|
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||||
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
|
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200_000)
|
||||||
|
|
||||||
|
// when
|
||||||
|
const actualLimit = resolveActualContextLimit("anthropic", "claude-sonnet-4-5", {
|
||||||
|
anthropicContext1MEnabled: true,
|
||||||
|
modelContextLimitsCache,
|
||||||
|
})
|
||||||
|
|
||||||
|
// then — explicit 1M flag overrides cached 200K
|
||||||
|
expect(actualLimit).toBe(1_000_000)
|
||||||
})
|
})
|
||||||
|
|
||||||
it("treats Anthropics aliases as Anthropic providers", () => {
|
it("treats Anthropics aliases as Anthropic providers", () => {
|
||||||
@@ -61,6 +109,23 @@ describe("resolveActualContextLimit", () => {
|
|||||||
expect(actualLimit).toBe(200000)
|
expect(actualLimit).toBe(200000)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it("supports Anthropic 4.6 dot-version model IDs without explicit 1M mode", () => {
|
||||||
|
// given
|
||||||
|
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||||
|
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||||
|
const modelContextLimitsCache = new Map<string, number>()
|
||||||
|
modelContextLimitsCache.set("anthropic/claude-opus-4.6", 1_000_000)
|
||||||
|
|
||||||
|
// when
|
||||||
|
const actualLimit = resolveActualContextLimit("anthropic", "claude-opus-4.6", {
|
||||||
|
anthropicContext1MEnabled: false,
|
||||||
|
modelContextLimitsCache,
|
||||||
|
})
|
||||||
|
|
||||||
|
// then
|
||||||
|
expect(actualLimit).toBe(1_000_000)
|
||||||
|
})
|
||||||
|
|
||||||
it("returns null for non-Anthropic providers without a cached limit", () => {
|
it("returns null for non-Anthropic providers without a cached limit", () => {
|
||||||
// given
|
// given
|
||||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||||
|
|||||||
@@ -1,6 +1,12 @@
|
|||||||
import process from "node:process"
|
import process from "node:process"
|
||||||
|
|
||||||
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
|
const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
|
||||||
|
const ANTHROPIC_NO_HEADER_GA_MODEL_IDS = new Set([
|
||||||
|
"claude-opus-4-6",
|
||||||
|
"claude-opus-4.6",
|
||||||
|
"claude-sonnet-4-6",
|
||||||
|
"claude-sonnet-4.6",
|
||||||
|
])
|
||||||
|
|
||||||
export type ContextLimitModelCacheState = {
|
export type ContextLimitModelCacheState = {
|
||||||
anthropicContext1MEnabled: boolean
|
anthropicContext1MEnabled: boolean
|
||||||
@@ -20,13 +26,23 @@ function getAnthropicActualLimit(modelCacheState?: ContextLimitModelCacheState):
|
|||||||
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT
|
: DEFAULT_ANTHROPIC_ACTUAL_LIMIT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isAnthropicNoHeaderGaModel(modelID: string): boolean {
|
||||||
|
return ANTHROPIC_NO_HEADER_GA_MODEL_IDS.has(modelID.toLowerCase())
|
||||||
|
}
|
||||||
|
|
||||||
export function resolveActualContextLimit(
|
export function resolveActualContextLimit(
|
||||||
providerID: string,
|
providerID: string,
|
||||||
modelID: string,
|
modelID: string,
|
||||||
modelCacheState?: ContextLimitModelCacheState,
|
modelCacheState?: ContextLimitModelCacheState,
|
||||||
): number | null {
|
): number | null {
|
||||||
if (isAnthropicProvider(providerID)) {
|
if (isAnthropicProvider(providerID)) {
|
||||||
return getAnthropicActualLimit(modelCacheState)
|
const explicit1M = getAnthropicActualLimit(modelCacheState)
|
||||||
|
if (explicit1M === 1_000_000) return explicit1M
|
||||||
|
|
||||||
|
const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`)
|
||||||
|
if (cachedLimit && isAnthropicNoHeaderGaModel(modelID)) return cachedLimit
|
||||||
|
|
||||||
|
return DEFAULT_ANTHROPIC_ACTUAL_LIMIT
|
||||||
}
|
}
|
||||||
|
|
||||||
return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null
|
return modelCacheState?.modelContextLimitsCache?.get(`${providerID}/${modelID}`) ?? null
|
||||||
|
|||||||
Reference in New Issue
Block a user