refactor(preemptive-compaction): use shared context-limit resolver to eliminate duplicated logic
This commit is contained in:
64
src/hooks/preemptive-compaction.aws-bedrock.test.ts
Normal file
64
src/hooks/preemptive-compaction.aws-bedrock.test.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { describe, expect, it, mock } from "bun:test"
|
||||
|
||||
import { OhMyOpenCodeConfigSchema } from "../config"
|
||||
|
||||
const { createPreemptiveCompactionHook } = await import("./preemptive-compaction")
|
||||
|
||||
type HookContext = Parameters<typeof createPreemptiveCompactionHook>[0]
|
||||
|
||||
function createMockContext(): HookContext {
|
||||
return {
|
||||
client: {
|
||||
session: {
|
||||
messages: mock(() => Promise.resolve({ data: [] })),
|
||||
summarize: mock(() => Promise.resolve({})),
|
||||
},
|
||||
tui: {
|
||||
showToast: mock(() => Promise.resolve()),
|
||||
},
|
||||
},
|
||||
directory: "/tmp/test",
|
||||
}
|
||||
}
|
||||
|
||||
describe("preemptive-compaction aws-bedrock-anthropic", () => {
|
||||
it("triggers compaction for aws-bedrock-anthropic provider when usage exceeds threshold", async () => {
|
||||
// given
|
||||
const ctx = createMockContext()
|
||||
const pluginConfig = OhMyOpenCodeConfigSchema.parse({})
|
||||
const hook = createPreemptiveCompactionHook(ctx, pluginConfig)
|
||||
const sessionID = "ses_aws_bedrock_anthropic_high"
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "aws-bedrock-anthropic",
|
||||
modelID: "claude-sonnet-4-6",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 170000,
|
||||
output: 1000,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "bash", sessionID, callID: "call_aws_bedrock_1" },
|
||||
{ title: "", output: "test", metadata: null },
|
||||
)
|
||||
|
||||
// then
|
||||
expect(ctx.client.session.summarize).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
})
|
||||
@@ -1,23 +1,13 @@
|
||||
import { log } from "../shared/logger"
|
||||
import type { OhMyOpenCodeConfig } from "../config"
|
||||
import {
|
||||
resolveActualContextLimit,
|
||||
type ContextLimitModelCacheState,
|
||||
} from "../shared/context-limit-resolver"
|
||||
|
||||
import { resolveCompactionModel } from "./shared/compaction-model-resolver"
|
||||
const DEFAULT_ACTUAL_LIMIT = 200_000
|
||||
const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
|
||||
|
||||
type ModelCacheStateLike = {
|
||||
anthropicContext1MEnabled: boolean
|
||||
modelContextLimitsCache?: Map<string, number>
|
||||
}
|
||||
|
||||
function getAnthropicActualLimit(modelCacheState?: ModelCacheStateLike): number {
|
||||
return (modelCacheState?.anthropicContext1MEnabled ?? false) ||
|
||||
process.env.ANTHROPIC_1M_CONTEXT === "true" ||
|
||||
process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true"
|
||||
? 1_000_000
|
||||
: DEFAULT_ACTUAL_LIMIT
|
||||
}
|
||||
|
||||
const PREEMPTIVE_COMPACTION_THRESHOLD = 0.78
|
||||
|
||||
interface TokenInfo {
|
||||
@@ -33,7 +23,7 @@ interface CachedCompactionState {
|
||||
tokens: TokenInfo
|
||||
}
|
||||
|
||||
function withTimeout<TValue>(
|
||||
async function withTimeout<TValue>(
|
||||
promise: Promise<TValue>,
|
||||
timeoutMs: number,
|
||||
errorMessage: string,
|
||||
@@ -46,17 +36,13 @@ function withTimeout<TValue>(
|
||||
}, timeoutMs)
|
||||
})
|
||||
|
||||
return Promise.race([promise, timeoutPromise]).finally(() => {
|
||||
return await Promise.race([promise, timeoutPromise]).finally(() => {
|
||||
if (timeoutID !== undefined) {
|
||||
clearTimeout(timeoutID)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function isAnthropicProvider(providerID: string): boolean {
|
||||
return providerID === "anthropic" || providerID === "google-vertex-anthropic"
|
||||
}
|
||||
|
||||
type PluginInput = {
|
||||
client: {
|
||||
session: {
|
||||
@@ -76,7 +62,7 @@ type PluginInput = {
|
||||
export function createPreemptiveCompactionHook(
|
||||
ctx: PluginInput,
|
||||
pluginConfig: OhMyOpenCodeConfig,
|
||||
modelCacheState?: ModelCacheStateLike,
|
||||
modelCacheState?: ContextLimitModelCacheState,
|
||||
) {
|
||||
const compactionInProgress = new Set<string>()
|
||||
const compactedSessions = new Set<string>()
|
||||
@@ -92,24 +78,18 @@ export function createPreemptiveCompactionHook(
|
||||
const cached = tokenCache.get(sessionID)
|
||||
if (!cached) return
|
||||
|
||||
const isAnthropic = isAnthropicProvider(cached.providerID)
|
||||
const modelSpecificLimit = !isAnthropic
|
||||
? modelCacheState?.modelContextLimitsCache?.get(`${cached.providerID}/${cached.modelID}`)
|
||||
: undefined
|
||||
const actualLimit = resolveActualContextLimit(
|
||||
cached.providerID,
|
||||
cached.modelID,
|
||||
modelCacheState,
|
||||
)
|
||||
|
||||
let actualLimit: number
|
||||
if (isAnthropic) {
|
||||
actualLimit = getAnthropicActualLimit(modelCacheState)
|
||||
} else {
|
||||
if (modelSpecificLimit === undefined) {
|
||||
log("[preemptive-compaction] Skipping preemptive compaction: unknown context limit for model", {
|
||||
providerID: cached.providerID,
|
||||
modelID: cached.modelID,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
actualLimit = modelSpecificLimit
|
||||
if (actualLimit === null) {
|
||||
log("[preemptive-compaction] Skipping preemptive compaction: unknown context limit for model", {
|
||||
providerID: cached.providerID,
|
||||
modelID: cached.modelID,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const lastTokens = cached.tokens
|
||||
|
||||
Reference in New Issue
Block a user