fix: prioritize Anthropic 1M limits over cached context limits
This commit is contained in:
@@ -90,4 +90,48 @@ describe("context-window-monitor modelContextLimitsCache", () => {
|
||||
expect(output.output).toContain("[Context Status: 72.5% used (190,000/262,144 tokens), 27.5% remaining]")
|
||||
expect(output.output).not.toContain("1,000,000")
|
||||
})
|
||||
|
||||
describe("#given Anthropic provider with cached context limit and 1M mode enabled", () => {
|
||||
describe("#when cached usage would exceed 200K but stay below 1M", () => {
|
||||
it("#then should ignore the cached limit and skip the reminder", async () => {
|
||||
// given
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200000)
|
||||
|
||||
const hook = createContextWindowMonitorHook({} as never, {
|
||||
anthropicContext1MEnabled: true,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
const sessionID = "ses_anthropic_1m_overrides_cached_limit"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-5",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 300000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 0, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// when
|
||||
const output = createOutput()
|
||||
await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "call_1" }, output)
|
||||
|
||||
// then
|
||||
expect(output.output).toBe("original")
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -62,12 +62,12 @@ export function createContextWindowMonitorHook(
|
||||
const cached = tokenCache.get(sessionID)
|
||||
if (!cached) return
|
||||
|
||||
const cachedLimit = modelCacheState?.modelContextLimitsCache?.get(
|
||||
`${cached.providerID}/${cached.modelID}`
|
||||
)
|
||||
const actualLimit =
|
||||
cachedLimit ??
|
||||
(isAnthropicProvider(cached.providerID) ? getAnthropicActualLimit(modelCacheState) : null)
|
||||
const modelSpecificLimit = !isAnthropicProvider(cached.providerID)
|
||||
? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
|
||||
: undefined
|
||||
const actualLimit = isAnthropicProvider(cached.providerID)
|
||||
? getAnthropicActualLimit(modelCacheState)
|
||||
: modelSpecificLimit
|
||||
|
||||
if (!actualLimit) return
|
||||
|
||||
|
||||
@@ -124,4 +124,32 @@ describe("getContextWindowUsage", () => {
|
||||
expect(usage?.usagePercentage).toBeCloseTo(180000 / 262144)
|
||||
expect(usage?.remainingTokens).toBe(82144)
|
||||
})
|
||||
|
||||
describe("#given Anthropic provider with cached context limit and 1M mode enabled", () => {
|
||||
describe("#when context usage is resolved", () => {
|
||||
it("#then should ignore the cached limit and use the 1M Anthropic limit", async () => {
|
||||
// given
|
||||
delete process.env[ANTHROPIC_CONTEXT_ENV_KEY]
|
||||
delete process.env[VERTEX_CONTEXT_ENV_KEY]
|
||||
|
||||
const modelContextLimitsCache = new Map<string, number>()
|
||||
modelContextLimitsCache.set("anthropic/claude-sonnet-4-5", 200000)
|
||||
|
||||
const ctx = createContextUsageMockContext(300000, {
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-5",
|
||||
})
|
||||
|
||||
// when
|
||||
const usage = await getContextWindowUsage(ctx as never, "ses_cached_anthropic_1m", {
|
||||
anthropicContext1MEnabled: true,
|
||||
modelContextLimitsCache,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(usage?.usagePercentage).toBe(0.3)
|
||||
expect(usage?.remainingTokens).toBe(700000)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -148,17 +148,18 @@ export async function getContextWindowUsage(
|
||||
const lastTokens = lastAssistant?.tokens;
|
||||
if (!lastAssistant || !lastTokens) return null;
|
||||
|
||||
const cachedLimit =
|
||||
lastAssistant.providerID !== undefined && lastAssistant.modelID !== undefined
|
||||
const modelSpecificLimit =
|
||||
lastAssistant.providerID !== undefined &&
|
||||
lastAssistant.modelID !== undefined &&
|
||||
!isAnthropicProvider(lastAssistant.providerID)
|
||||
? modelCacheState?.modelContextLimitsCache?.get(
|
||||
`${lastAssistant.providerID}/${lastAssistant.modelID}`,
|
||||
)
|
||||
: undefined;
|
||||
const actualLimit =
|
||||
cachedLimit ??
|
||||
(lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
|
||||
lastAssistant.providerID !== undefined && isAnthropicProvider(lastAssistant.providerID)
|
||||
? getAnthropicActualLimit(modelCacheState)
|
||||
: null);
|
||||
: modelSpecificLimit ?? null;
|
||||
|
||||
if (!actualLimit) return null;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user