From d78669126024c925c582f08b014b1db37326c587 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 17 Feb 2026 10:45:48 +0900 Subject: [PATCH] fix: read anthropic 1m flag from live model cache state --- src/hooks/context-window-monitor.test.ts | 8 ++++++-- src/hooks/context-window-monitor.ts | 12 +++++++---- src/hooks/directory-agents-injector/hook.ts | 4 ++-- src/hooks/directory-readme-injector/hook.ts | 4 ++-- src/hooks/preemptive-compaction.test.ts | 8 ++++++-- src/hooks/preemptive-compaction.ts | 12 +++++++---- src/hooks/rules-injector/hook.ts | 4 ++-- src/hooks/tool-output-truncator.ts | 4 ++-- src/plugin/hooks/create-session-hooks.ts | 4 ++-- src/plugin/hooks/create-tool-guard-hooks.ts | 8 ++++---- src/shared/dynamic-truncator.test.ts | 12 ++++++++--- src/shared/dynamic-truncator.ts | 22 ++++++++++++--------- 12 files changed, 64 insertions(+), 38 deletions(-) diff --git a/src/hooks/context-window-monitor.test.ts b/src/hooks/context-window-monitor.test.ts index d0f8de3fe..515e94f2c 100644 --- a/src/hooks/context-window-monitor.test.ts +++ b/src/hooks/context-window-monitor.test.ts @@ -249,7 +249,9 @@ describe("context-window-monitor", () => { it("should use 1M limit when model cache flag is enabled", async () => { //#given - const hook = createContextWindowMonitorHook(ctx as never, true) + const hook = createContextWindowMonitorHook(ctx as never, { + anthropicContext1MEnabled: true, + }) const sessionID = "ses_1m_flag" await hook.event({ @@ -286,7 +288,9 @@ describe("context-window-monitor", () => { it("should keep env var fallback when model cache flag is disabled", async () => { //#given process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" - const hook = createContextWindowMonitorHook(ctx as never, false) + const hook = createContextWindowMonitorHook(ctx as never, { + anthropicContext1MEnabled: false, + }) const sessionID = "ses_env_fallback" await hook.event({ diff --git a/src/hooks/context-window-monitor.ts b/src/hooks/context-window-monitor.ts index 91d99a76d..399c0810c 100644 --- a/src/hooks/context-window-monitor.ts +++ b/src/hooks/context-window-monitor.ts @@ -5,8 +5,12 @@ const ANTHROPIC_DISPLAY_LIMIT = 1_000_000 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000 const CONTEXT_WARNING_THRESHOLD = 0.70 -function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { - return anthropicContext1MEnabled || +type ModelCacheStateLike = { + anthropicContext1MEnabled: boolean +} + +function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { + return (modelCacheState?.anthropicContext1MEnabled ?? false) || process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 @@ -37,7 +41,7 @@ function isAnthropicProvider(providerID: string): boolean { export function createContextWindowMonitorHook( _ctx: PluginInput, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ) { const remindedSessions = new Set() const tokenCache = new Map() @@ -59,7 +63,7 @@ export function createContextWindowMonitorHook( const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const actualUsagePercentage = - totalInputTokens / getAnthropicActualLimit(anthropicContext1MEnabled) + totalInputTokens / getAnthropicActualLimit(modelCacheState) if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return diff --git a/src/hooks/directory-agents-injector/hook.ts b/src/hooks/directory-agents-injector/hook.ts index 0dd431a73..fba64cc7b 100644 --- a/src/hooks/directory-agents-injector/hook.ts +++ b/src/hooks/directory-agents-injector/hook.ts @@ -29,10 +29,10 @@ interface EventInput { export function createDirectoryAgentsInjectorHook( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: { anthropicContext1MEnabled: boolean }, ) { const sessionCaches = new Map>(); - const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); + const truncator = createDynamicTruncator(ctx, modelCacheState); const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolName = input.tool.toLowerCase(); diff --git a/src/hooks/directory-readme-injector/hook.ts b/src/hooks/directory-readme-injector/hook.ts index 608f81236..d621c7f27 100644 --- a/src/hooks/directory-readme-injector/hook.ts +++ b/src/hooks/directory-readme-injector/hook.ts @@ -29,10 +29,10 @@ interface EventInput { export function createDirectoryReadmeInjectorHook( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: { anthropicContext1MEnabled: boolean }, ) { const sessionCaches = new Map>(); - const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); + const truncator = createDynamicTruncator(ctx, modelCacheState); const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => { const toolName = input.tool.toLowerCase(); diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts index 41e704e9a..1550a5d53 100644 --- a/src/hooks/preemptive-compaction.test.ts +++ b/src/hooks/preemptive-compaction.test.ts @@ -269,7 +269,9 @@ describe("preemptive-compaction", () => { it("should use 1M limit when model cache flag is enabled", async () => { //#given - const hook = createPreemptiveCompactionHook(ctx as never, true) + const hook = createPreemptiveCompactionHook(ctx as never, { + anthropicContext1MEnabled: true, + }) const sessionID = "ses_1m_flag" await hook.event({ @@ -306,7 +308,9 @@ describe("preemptive-compaction", () => { it("should keep env var fallback when model cache flag is disabled", async () => { //#given process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true" - const hook = createPreemptiveCompactionHook(ctx as never, false) + const hook = createPreemptiveCompactionHook(ctx as never, { + anthropicContext1MEnabled: false, + }) const sessionID = "ses_env_fallback" await hook.event({ diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts index 5157ab7aa..3e60e2a8d 100644 --- a/src/hooks/preemptive-compaction.ts +++ b/src/hooks/preemptive-compaction.ts @@ -2,8 +2,12 @@ import { log } from "../shared/logger" const DEFAULT_ACTUAL_LIMIT = 200_000 -function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number { - return anthropicContext1MEnabled || +type ModelCacheStateLike = { + anthropicContext1MEnabled: boolean +} + +function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { + return (modelCacheState?.anthropicContext1MEnabled ?? false) || process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 @@ -47,7 +51,7 @@ type PluginInput = { export function createPreemptiveCompactionHook( ctx: PluginInput, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ) { const compactionInProgress = new Set() const compactedSessions = new Set() @@ -65,7 +69,7 @@ export function createPreemptiveCompactionHook( const actualLimit = isAnthropicProvider(cached.providerID) - ? getAnthropicActualLimit(anthropicContext1MEnabled) + ? getAnthropicActualLimit(modelCacheState) : DEFAULT_ACTUAL_LIMIT const lastTokens = cached.tokens diff --git a/src/hooks/rules-injector/hook.ts b/src/hooks/rules-injector/hook.ts index 4300fba50..fec4ffd2a 100644 --- a/src/hooks/rules-injector/hook.ts +++ b/src/hooks/rules-injector/hook.ts @@ -31,9 +31,9 @@ const TRACKED_TOOLS = ["read", "write", "edit", "multiedit"]; export function createRulesInjectorHook( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: { anthropicContext1MEnabled: boolean }, ) { - const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled); + const truncator = createDynamicTruncator(ctx, modelCacheState); const { getSessionCache, clearSessionCache } = createSessionCacheStore(); const { processFilePathForInjection } = createRuleInjectionProcessor({ workspaceDirectory: ctx.directory, diff --git a/src/hooks/tool-output-truncator.ts b/src/hooks/tool-output-truncator.ts index f3d880132..f47bf199b 100644 --- a/src/hooks/tool-output-truncator.ts +++ b/src/hooks/tool-output-truncator.ts @@ -27,12 +27,12 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record = { } interface ToolOutputTruncatorOptions { - anthropicContext1MEnabled?: boolean + modelCacheState?: { anthropicContext1MEnabled: boolean } experimental?: ExperimentalConfig } export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) { - const truncator = createDynamicTruncator(ctx, options?.anthropicContext1MEnabled) + const truncator = createDynamicTruncator(ctx, options?.modelCacheState) const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false const toolExecuteAfter = async ( diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index 457adda25..3d6d1c89b 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -66,14 +66,14 @@ export function createSessionHooks(args: { const contextWindowMonitor = isHookEnabled("context-window-monitor") ? safeHook("context-window-monitor", () => - createContextWindowMonitorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createContextWindowMonitorHook(ctx, modelCacheState)) : null const preemptiveCompaction = isHookEnabled("preemptive-compaction") && pluginConfig.experimental?.preemptive_compaction ? safeHook("preemptive-compaction", () => - createPreemptiveCompactionHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createPreemptiveCompactionHook(ctx, modelCacheState)) : null const sessionRecovery = isHookEnabled("session-recovery") diff --git a/src/plugin/hooks/create-tool-guard-hooks.ts b/src/plugin/hooks/create-tool-guard-hooks.ts index b49b60b47..b762dfdea 100644 --- a/src/plugin/hooks/create-tool-guard-hooks.ts +++ b/src/plugin/hooks/create-tool-guard-hooks.ts @@ -51,7 +51,7 @@ export function createToolGuardHooks(args: { const toolOutputTruncator = isHookEnabled("tool-output-truncator") ? safeHook("tool-output-truncator", () => createToolOutputTruncatorHook(ctx, { - anthropicContext1MEnabled: modelCacheState.anthropicContext1MEnabled, + modelCacheState, experimental: pluginConfig.experimental, })) : null @@ -68,13 +68,13 @@ export function createToolGuardHooks(args: { }) } else { directoryAgentsInjector = safeHook("directory-agents-injector", () => - createDirectoryAgentsInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createDirectoryAgentsInjectorHook(ctx, modelCacheState)) } } const directoryReadmeInjector = isHookEnabled("directory-readme-injector") ? safeHook("directory-readme-injector", () => - createDirectoryReadmeInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createDirectoryReadmeInjectorHook(ctx, modelCacheState)) : null const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector") @@ -83,7 +83,7 @@ export function createToolGuardHooks(args: { const rulesInjector = isHookEnabled("rules-injector") ? safeHook("rules-injector", () => - createRulesInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled)) + createRulesInjectorHook(ctx, modelCacheState)) : null const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler") diff --git a/src/shared/dynamic-truncator.test.ts b/src/shared/dynamic-truncator.test.ts index 91105bc77..0a91d7096 100644 --- a/src/shared/dynamic-truncator.test.ts +++ b/src/shared/dynamic-truncator.test.ts @@ -60,7 +60,9 @@ describe("getContextWindowUsage", () => { const ctx = createContextUsageMockContext(300000) //#when - const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", true) + const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", { + anthropicContext1MEnabled: true, + }) //#then expect(usage?.usagePercentage).toBe(0.3) @@ -74,7 +76,9 @@ describe("getContextWindowUsage", () => { const ctx = createContextUsageMockContext(150000) //#when - const usage = await getContextWindowUsage(ctx as never, "ses_default", false) + const usage = await getContextWindowUsage(ctx as never, "ses_default", { + anthropicContext1MEnabled: false, + }) //#then expect(usage?.usagePercentage).toBe(0.75) @@ -87,7 +91,9 @@ describe("getContextWindowUsage", () => { const ctx = createContextUsageMockContext(300000) //#when - const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", false) + const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", { + anthropicContext1MEnabled: false, + }) //#then expect(usage?.usagePercentage).toBe(0.3) diff --git a/src/shared/dynamic-truncator.ts b/src/shared/dynamic-truncator.ts index 15432ce6e..5236f3e76 100644 --- a/src/shared/dynamic-truncator.ts +++ b/src/shared/dynamic-truncator.ts @@ -5,8 +5,12 @@ const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000; const CHARS_PER_TOKEN_ESTIMATE = 4; const DEFAULT_TARGET_MAX_TOKENS = 50_000; -function getAnthropicActualLimit(anthropicContext1MEnabled = false): number { - return anthropicContext1MEnabled || +type ModelCacheStateLike = { + anthropicContext1MEnabled: boolean; +} + +function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number { + return (modelCacheState?.anthropicContext1MEnabled ?? false) || process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1_000_000 @@ -114,7 +118,7 @@ export function truncateToTokenLimit( export async function getContextWindowUsage( ctx: PluginInput, sessionID: string, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ): Promise<{ usedTokens: number; remainingTokens: number; @@ -139,7 +143,7 @@ export async function getContextWindowUsage( (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) + (lastTokens?.output ?? 0); - const anthropicActualLimit = getAnthropicActualLimit(anthropicContext1MEnabled); + const anthropicActualLimit = getAnthropicActualLimit(modelCacheState); const remainingTokens = anthropicActualLimit - usedTokens; return { @@ -157,7 +161,7 @@ export async function dynamicTruncate( sessionID: string, output: string, options: TruncationOptions = {}, - anthropicContext1MEnabled = false, + modelCacheState?: ModelCacheStateLike, ): Promise { if (typeof output !== 'string') { return { result: String(output ?? ''), truncated: false }; @@ -168,7 +172,7 @@ export async function dynamicTruncate( preserveHeaderLines = 3, } = options; - const usage = await getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled); + const usage = await getContextWindowUsage(ctx, sessionID, modelCacheState); if (!usage) { // Fallback: apply conservative truncation when context usage unavailable @@ -192,17 +196,17 @@ export async function dynamicTruncate( export function createDynamicTruncator( ctx: PluginInput, - anthropicContext1MEnabled?: boolean, + modelCacheState?: ModelCacheStateLike, ) { return { truncate: ( sessionID: string, output: string, options?: TruncationOptions, - ) => dynamicTruncate(ctx, sessionID, output, options, anthropicContext1MEnabled), + ) => dynamicTruncate(ctx, sessionID, output, options, modelCacheState), getUsage: (sessionID: string) => - getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled), + getContextWindowUsage(ctx, sessionID, modelCacheState), truncateSync: ( output: string,