Files
oh-my-openagent/src/hooks/tool-output-truncator.test.ts
Sisyphus 48dc8298dd fix(webfetch): apply aggressive truncation for webfetch outputs (#434)
Root cause: DEFAULT_TARGET_MAX_TOKENS (50k tokens ~200k chars) was too high
for webfetch outputs. Web pages can be large but most content doesn't exceed
this limit, so truncation rarely triggered.

Changes:
- Add WEBFETCH_MAX_TOKENS = 10k tokens (~40k chars) for web content
- Introduce TOOL_SPECIFIC_MAX_TOKENS map for per-tool limits
- webfetch/WebFetch now use aggressive 10k token limit
- Other tools continue using default 50k token limit
- Add comprehensive tests for truncation behavior

Fixes #195

Co-authored-by: sisyphus-dev-ai <sisyphus-dev-ai@users.noreply.github.com>
2026-01-03 12:09:34 +09:00

169 lines
5.8 KiB
TypeScript

import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
import { createToolOutputTruncatorHook } from "./tool-output-truncator"
import * as dynamicTruncator from "../shared/dynamic-truncator"
describe("createToolOutputTruncatorHook", () => {
let hook: ReturnType<typeof createToolOutputTruncatorHook>
let truncateSpy: ReturnType<typeof spyOn>
beforeEach(() => {
truncateSpy = spyOn(dynamicTruncator, "createDynamicTruncator").mockReturnValue({
truncate: mock(async (_sessionID: string, output: string, options?: { targetMaxTokens?: number }) => ({
result: output,
truncated: false,
targetMaxTokens: options?.targetMaxTokens,
})),
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
})
describe("tool.execute.after", () => {
const createInput = (tool: string) => ({
tool,
sessionID: "test-session",
callID: "test-call-id",
})
const createOutput = (outputText: string) => ({
title: "Result",
output: outputText,
metadata: {},
})
describe("#given webfetch tool", () => {
describe("#when output is processed", () => {
it("#then should use aggressive truncation limit (10k tokens)", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
targetMaxTokens: options?.targetMaxTokens,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("webfetch")
const output = createOutput("large content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalledWith(
"test-session",
"large content",
{ targetMaxTokens: 10_000 }
)
})
})
describe("#when using WebFetch variant", () => {
it("#then should also use aggressive truncation limit", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("WebFetch")
const output = createOutput("large content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalledWith(
"test-session",
"large content",
{ targetMaxTokens: 10_000 }
)
})
})
})
describe("#given grep tool", () => {
describe("#when output is processed", () => {
it("#then should use default truncation limit (50k tokens)", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("grep")
const output = createOutput("grep output")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalledWith(
"test-session",
"grep output",
{ targetMaxTokens: 50_000 }
)
})
})
})
describe("#given non-truncatable tool", () => {
describe("#when tool is not in TRUNCATABLE_TOOLS list", () => {
it("#then should not call truncator", async () => {
const truncateMock = mock(async () => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never)
const input = createInput("Read")
const output = createOutput("file content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).not.toHaveBeenCalled()
})
})
})
describe("#given truncate_all_tool_outputs enabled", () => {
describe("#when any tool output is processed", () => {
it("#then should truncate non-listed tools too", async () => {
const truncateMock = mock(async (_sessionID: string, _output: string, options?: { targetMaxTokens?: number }) => ({
result: "truncated",
truncated: true,
}))
truncateSpy.mockReturnValue({
truncate: truncateMock,
getUsage: mock(async () => null),
truncateSync: mock(() => ({ result: "", truncated: false })),
})
hook = createToolOutputTruncatorHook({} as never, {
experimental: { truncate_all_tool_outputs: true },
})
const input = createInput("Read")
const output = createOutput("file content")
await hook["tool.execute.after"](input, output)
expect(truncateMock).toHaveBeenCalled()
})
})
})
})
})