fix: read anthropic 1m flag from live model cache state

2026-02-17 10:45:48 +09:00
parent 363016681b
commit d786691260
12 changed files with 64 additions and 38 deletions
--- a/src/hooks/context-window-monitor.test.ts
+++ b/src/hooks/context-window-monitor.test.ts
@@ -249,7 +249,9 @@ describe("context-window-monitor", () => {

  it("should use 1M limit when model cache flag is enabled", async () => {
    //#given
-    const hook = createContextWindowMonitorHook(ctx as never, true)
+    const hook = createContextWindowMonitorHook(ctx as never, {
+      anthropicContext1MEnabled: true,
+    })
    const sessionID = "ses_1m_flag"

    await hook.event({
@@ -286,7 +288,9 @@ describe("context-window-monitor", () => {
  it("should keep env var fallback when model cache flag is disabled", async () => {
    //#given
    process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
-    const hook = createContextWindowMonitorHook(ctx as never, false)
+    const hook = createContextWindowMonitorHook(ctx as never, {
+      anthropicContext1MEnabled: false,
+    })
    const sessionID = "ses_env_fallback"

    await hook.event({
--- a/src/hooks/context-window-monitor.ts
+++ b/src/hooks/context-window-monitor.ts
@@ -5,8 +5,12 @@ const ANTHROPIC_DISPLAY_LIMIT = 1_000_000
 const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000
 const CONTEXT_WARNING_THRESHOLD = 0.70

-function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number {
-  return anthropicContext1MEnabled ||
+type ModelCacheStateLike = {
+  anthropicContext1MEnabled: boolean
+}
+
+function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
+  return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
    process.env.ANTHROPIC_1M_CONTEXT === "true" ||
    process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
    ? 1_000_000
@@ -37,7 +41,7 @@ function isAnthropicProvider(providerID: string): boolean {

 export function createContextWindowMonitorHook(
  _ctx: PluginInput,
-  anthropicContext1MEnabled = false,
+  modelCacheState?: ModelCacheStateLike,
 ) {
  const remindedSessions = new Set<string>()
  const tokenCache = new Map<string, CachedTokenState>()
@@ -59,7 +63,7 @@ export function createContextWindowMonitorHook(
    const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)

    const actualUsagePercentage =
-      totalInputTokens / getAnthropicActualLimit(anthropicContext1MEnabled)
+      totalInputTokens / getAnthropicActualLimit(modelCacheState)

    if (actualUsagePercentage < CONTEXT_WARNING_THRESHOLD) return

--- a/src/hooks/directory-agents-injector/hook.ts
+++ b/src/hooks/directory-agents-injector/hook.ts
@@ -29,10 +29,10 @@ interface EventInput {

 export function createDirectoryAgentsInjectorHook(
  ctx: PluginInput,
-  anthropicContext1MEnabled?: boolean,
+  modelCacheState?: { anthropicContext1MEnabled: boolean },
 ) {
  const sessionCaches = new Map<string, Set<string>>();
-  const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled);
+  const truncator = createDynamicTruncator(ctx, modelCacheState);

  const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
    const toolName = input.tool.toLowerCase();
--- a/src/hooks/directory-readme-injector/hook.ts
+++ b/src/hooks/directory-readme-injector/hook.ts
@@ -29,10 +29,10 @@ interface EventInput {

 export function createDirectoryReadmeInjectorHook(
  ctx: PluginInput,
-  anthropicContext1MEnabled?: boolean,
+  modelCacheState?: { anthropicContext1MEnabled: boolean },
 ) {
  const sessionCaches = new Map<string, Set<string>>();
-  const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled);
+  const truncator = createDynamicTruncator(ctx, modelCacheState);

  const toolExecuteAfter = async (input: ToolExecuteInput, output: ToolExecuteOutput) => {
    const toolName = input.tool.toLowerCase();
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -269,7 +269,9 @@ describe("preemptive-compaction", () => {

  it("should use 1M limit when model cache flag is enabled", async () => {
    //#given
-    const hook = createPreemptiveCompactionHook(ctx as never, true)
+    const hook = createPreemptiveCompactionHook(ctx as never, {
+      anthropicContext1MEnabled: true,
+    })
    const sessionID = "ses_1m_flag"

    await hook.event({
@@ -306,7 +308,9 @@ describe("preemptive-compaction", () => {
  it("should keep env var fallback when model cache flag is disabled", async () => {
    //#given
    process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
-    const hook = createPreemptiveCompactionHook(ctx as never, false)
+    const hook = createPreemptiveCompactionHook(ctx as never, {
+      anthropicContext1MEnabled: false,
+    })
    const sessionID = "ses_env_fallback"

    await hook.event({
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -2,8 +2,12 @@ import { log } from "../shared/logger"

 const DEFAULT_ACTUAL_LIMIT = 200_000

-function getAnthropicActualLimit(anthropicContext1MEnabled: boolean): number {
-  return anthropicContext1MEnabled ||
+type ModelCacheStateLike = {
+  anthropicContext1MEnabled: boolean
+}
+
+function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
+  return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
    process.env.ANTHROPIC_1M_CONTEXT === "true" ||
    process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
    ? 1_000_000
@@ -47,7 +51,7 @@ type PluginInput = {

 export function createPreemptiveCompactionHook(
  ctx: PluginInput,
-  anthropicContext1MEnabled = false,
+  modelCacheState?: ModelCacheStateLike,
 ) {
  const compactionInProgress = new Set<string>()
  const compactedSessions = new Set<string>()
@@ -65,7 +69,7 @@ export function createPreemptiveCompactionHook(

    const actualLimit =
      isAnthropicProvider(cached.providerID)
-        ? getAnthropicActualLimit(anthropicContext1MEnabled)
+        ? getAnthropicActualLimit(modelCacheState)
        : DEFAULT_ACTUAL_LIMIT

    const lastTokens = cached.tokens
--- a/src/hooks/rules-injector/hook.ts
+++ b/src/hooks/rules-injector/hook.ts
@@ -31,9 +31,9 @@ const TRACKED_TOOLS = ["read", "write", "edit", "multiedit"];

 export function createRulesInjectorHook(
  ctx: PluginInput,
-  anthropicContext1MEnabled?: boolean,
+  modelCacheState?: { anthropicContext1MEnabled: boolean },
 ) {
-  const truncator = createDynamicTruncator(ctx, anthropicContext1MEnabled);
+  const truncator = createDynamicTruncator(ctx, modelCacheState);
  const { getSessionCache, clearSessionCache } = createSessionCacheStore();
  const { processFilePathForInjection } = createRuleInjectionProcessor({
    workspaceDirectory: ctx.directory,
--- a/src/hooks/tool-output-truncator.ts
+++ b/src/hooks/tool-output-truncator.ts
@@ -27,12 +27,12 @@ const TOOL_SPECIFIC_MAX_TOKENS: Record<string, number> = {
 }

 interface ToolOutputTruncatorOptions {
-  anthropicContext1MEnabled?: boolean
+  modelCacheState?: { anthropicContext1MEnabled: boolean }
  experimental?: ExperimentalConfig
 }

 export function createToolOutputTruncatorHook(ctx: PluginInput, options?: ToolOutputTruncatorOptions) {
-  const truncator = createDynamicTruncator(ctx, options?.anthropicContext1MEnabled)
+  const truncator = createDynamicTruncator(ctx, options?.modelCacheState)
  const truncateAll = options?.experimental?.truncate_all_tool_outputs ?? false

  const toolExecuteAfter = async (
--- a/src/plugin/hooks/create-session-hooks.ts
+++ b/src/plugin/hooks/create-session-hooks.ts
@@ -66,14 +66,14 @@ export function createSessionHooks(args: {

  const contextWindowMonitor = isHookEnabled("context-window-monitor")
    ? safeHook("context-window-monitor", () =>
-        createContextWindowMonitorHook(ctx, modelCacheState.anthropicContext1MEnabled))
+        createContextWindowMonitorHook(ctx, modelCacheState))
    : null

  const preemptiveCompaction =
    isHookEnabled("preemptive-compaction") &&
    pluginConfig.experimental?.preemptive_compaction
      ? safeHook("preemptive-compaction", () =>
-          createPreemptiveCompactionHook(ctx, modelCacheState.anthropicContext1MEnabled))
+          createPreemptiveCompactionHook(ctx, modelCacheState))
      : null

  const sessionRecovery = isHookEnabled("session-recovery")
--- a/src/plugin/hooks/create-tool-guard-hooks.ts
+++ b/src/plugin/hooks/create-tool-guard-hooks.ts
@@ -51,7 +51,7 @@ export function createToolGuardHooks(args: {
  const toolOutputTruncator = isHookEnabled("tool-output-truncator")
    ? safeHook("tool-output-truncator", () =>
        createToolOutputTruncatorHook(ctx, {
-          anthropicContext1MEnabled: modelCacheState.anthropicContext1MEnabled,
+          modelCacheState,
          experimental: pluginConfig.experimental,
        }))
    : null
@@ -68,13 +68,13 @@ export function createToolGuardHooks(args: {
      })
    } else {
      directoryAgentsInjector = safeHook("directory-agents-injector", () =>
-        createDirectoryAgentsInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled))
+        createDirectoryAgentsInjectorHook(ctx, modelCacheState))
    }
  }

  const directoryReadmeInjector = isHookEnabled("directory-readme-injector")
    ? safeHook("directory-readme-injector", () =>
-        createDirectoryReadmeInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled))
+        createDirectoryReadmeInjectorHook(ctx, modelCacheState))
    : null

  const emptyTaskResponseDetector = isHookEnabled("empty-task-response-detector")
@@ -83,7 +83,7 @@ export function createToolGuardHooks(args: {

  const rulesInjector = isHookEnabled("rules-injector")
    ? safeHook("rules-injector", () =>
-        createRulesInjectorHook(ctx, modelCacheState.anthropicContext1MEnabled))
+        createRulesInjectorHook(ctx, modelCacheState))
    : null

  const tasksTodowriteDisabler = isHookEnabled("tasks-todowrite-disabler")
--- a/src/shared/dynamic-truncator.test.ts
+++ b/src/shared/dynamic-truncator.test.ts
@@ -60,7 +60,9 @@ describe("getContextWindowUsage", () => {
    const ctx = createContextUsageMockContext(300000)

    //#when
-    const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", true)
+    const usage = await getContextWindowUsage(ctx as never, "ses_1m_flag", {
+      anthropicContext1MEnabled: true,
+    })

    //#then
    expect(usage?.usagePercentage).toBe(0.3)
@@ -74,7 +76,9 @@ describe("getContextWindowUsage", () => {
    const ctx = createContextUsageMockContext(150000)

    //#when
-    const usage = await getContextWindowUsage(ctx as never, "ses_default", false)
+    const usage = await getContextWindowUsage(ctx as never, "ses_default", {
+      anthropicContext1MEnabled: false,
+    })

    //#then
    expect(usage?.usagePercentage).toBe(0.75)
@@ -87,7 +91,9 @@ describe("getContextWindowUsage", () => {
    const ctx = createContextUsageMockContext(300000)

    //#when
-    const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", false)
+    const usage = await getContextWindowUsage(ctx as never, "ses_env_fallback", {
+      anthropicContext1MEnabled: false,
+    })

    //#then
    expect(usage?.usagePercentage).toBe(0.3)
--- a/src/shared/dynamic-truncator.ts
+++ b/src/shared/dynamic-truncator.ts
@@ -5,8 +5,12 @@ const DEFAULT_ANTHROPIC_ACTUAL_LIMIT = 200_000;
 const CHARS_PER_TOKEN_ESTIMATE = 4;
 const DEFAULT_TARGET_MAX_TOKENS = 50_000;

-function getAnthropicActualLimit(anthropicContext1MEnabled = false): number {
-	return anthropicContext1MEnabled ||
+type ModelCacheStateLike = {
+	anthropicContext1MEnabled: boolean;
+}
+
+function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
+	return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
 		process.env.ANTHROPIC_1M_CONTEXT === "true" ||
 		process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
 		? 1_000_000
@@ -114,7 +118,7 @@ export function truncateToTokenLimit(
 export async function getContextWindowUsage(
 	ctx: PluginInput,
 	sessionID: string,
-	anthropicContext1MEnabled = false,
+	modelCacheState?: ModelCacheStateLike,
 ): Promise<{
 	usedTokens: number;
 	remainingTokens: number;
@@ -139,7 +143,7 @@ export async function getContextWindowUsage(
 			(lastTokens?.input ?? 0) +
 			(lastTokens?.cache?.read ?? 0) +
 			(lastTokens?.output ?? 0);
-		const anthropicActualLimit = getAnthropicActualLimit(anthropicContext1MEnabled);
+		const anthropicActualLimit = getAnthropicActualLimit(modelCacheState);
 		const remainingTokens = anthropicActualLimit - usedTokens;

 		return {
@@ -157,7 +161,7 @@ export async function dynamicTruncate(
 	sessionID: string,
 	output: string,
 	options: TruncationOptions = {},
-	anthropicContext1MEnabled = false,
+	modelCacheState?: ModelCacheStateLike,
 ): Promise<TruncationResult> {
 	if (typeof output !== 'string') {
 		return { result: String(output ?? ''), truncated: false };
@@ -168,7 +172,7 @@ export async function dynamicTruncate(
 		preserveHeaderLines = 3,
 	} = options;

-	const usage = await getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled);
+	const usage = await getContextWindowUsage(ctx, sessionID, modelCacheState);

 	if (!usage) {
 		// Fallback: apply conservative truncation when context usage unavailable
@@ -192,17 +196,17 @@ export async function dynamicTruncate(

 export function createDynamicTruncator(
 	ctx: PluginInput,
-	anthropicContext1MEnabled?: boolean,
+	modelCacheState?: ModelCacheStateLike,
 ) {
 	return {
 		truncate: (
 			sessionID: string,
 			output: string,
 			options?: TruncationOptions,
-		) => dynamicTruncate(ctx, sessionID, output, options, anthropicContext1MEnabled),
+		) => dynamicTruncate(ctx, sessionID, output, options, modelCacheState),

 		getUsage: (sessionID: string) =>
-			getContextWindowUsage(ctx, sessionID, anthropicContext1MEnabled),
+			getContextWindowUsage(ctx, sessionID, modelCacheState),

 		truncateSync: (
 			output: string,