Merge pull request #2210 from code-yeongyu/fix/c1-synthetic-part-id

fix(context-injector): use deterministic synthetic part ID for cache stability
This commit is contained in:
YeonGyu-Kim
2026-02-28 13:40:16 +09:00
committed by GitHub
18 changed files with 1424 additions and 8 deletions

View File

@@ -4,6 +4,8 @@ import { describe, it, expect } from "bun:test"
import {
buildCategorySkillsDelegationGuide,
buildUltraworkSection,
buildDeepParallelSection,
buildNonClaudePlannerSection,
type AvailableSkill,
type AvailableCategory,
type AvailableAgent,
@@ -172,4 +174,86 @@ describe("buildUltraworkSection", () => {
})
})
describe("buildDeepParallelSection", () => {
const deepCategory: AvailableCategory = { name: "deep", description: "Autonomous problem-solving" }
const otherCategory: AvailableCategory = { name: "quick", description: "Trivial tasks" }
it("#given non-Claude model with deep category #when building #then returns parallel delegation section", () => {
//#given
const model = "google/gemini-3-pro"
const categories = [deepCategory, otherCategory]
//#when
const result = buildDeepParallelSection(model, categories)
//#then
expect(result).toContain("Deep Parallel Delegation")
expect(result).toContain("EVERY independent unit")
expect(result).toContain("run_in_background=true")
expect(result).toContain("4 independent units")
})
it("#given Claude model #when building #then returns empty", () => {
//#given
const model = "anthropic/claude-opus-4-6"
const categories = [deepCategory]
//#when
const result = buildDeepParallelSection(model, categories)
//#then
expect(result).toBe("")
})
it("#given non-Claude model without deep category #when building #then returns empty", () => {
//#given
const model = "openai/gpt-5.2"
const categories = [otherCategory]
//#when
const result = buildDeepParallelSection(model, categories)
//#then
expect(result).toBe("")
})
})
describe("buildNonClaudePlannerSection", () => {
it("#given non-Claude model #when building #then returns plan agent section", () => {
//#given
const model = "google/gemini-3-pro"
//#when
const result = buildNonClaudePlannerSection(model)
//#then
expect(result).toContain("Plan Agent")
expect(result).toContain("session_id")
expect(result).toContain("Multi-step")
})
it("#given Claude model #when building #then returns empty", () => {
//#given
const model = "anthropic/claude-sonnet-4-6"
//#when
const result = buildNonClaudePlannerSection(model)
//#then
expect(result).toBe("")
})
it("#given GPT model #when building #then returns plan agent section", () => {
//#given
const model = "openai/gpt-5.2"
//#when
const result = buildNonClaudePlannerSection(model)
//#then
expect(result).toContain("Plan Agent")
expect(result).not.toBe("")
})
})

View File

@@ -316,6 +316,22 @@ export function buildAntiPatternsSection(): string {
${patterns.join("\n")}`
}
export function buildNonClaudePlannerSection(model: string): string {
const isNonClaude = !model.toLowerCase().includes('claude')
if (!isNonClaude) return ""
return `### Plan Agent Dependency (Non-Claude)
Multi-step task? **ALWAYS consult Plan Agent first.** Do NOT start implementation without a plan.
- Single-file fix or trivial change → proceed directly
- Anything else (2+ steps, unclear scope, architecture) → \`task(subagent_type="plan", ...)\` FIRST
- Use \`session_id\` to resume the same Plan Agent — ask follow-up questions aggressively
- If ANY part of the task is ambiguous, ask Plan Agent before guessing
Plan Agent returns a structured work breakdown with parallel execution opportunities. Follow it.`
}
export function buildDeepParallelSection(model: string, categories: AvailableCategory[]): string {
const isNonClaude = !model.toLowerCase().includes('claude')
const hasDeepCategory = categories.some(c => c.name === 'deep')
@@ -324,12 +340,13 @@ export function buildDeepParallelSection(model: string, categories: AvailableCat
return `### Deep Parallel Delegation
For implementation tasks, actively decompose and delegate to \`deep\` category agents in parallel.
Delegate EVERY independent unit to a \`deep\` agent in parallel (\`run_in_background=true\`).
If a task decomposes into 4 independent units, spawn 4 agents simultaneously — not 1 at a time.
1. Break the implementation into independent work units
2. Maximize parallel deep agents — spawn one per independent unit (\`run_in_background=true\`)
3. Give each agent a GOAL, not step-by-step instructions — deep agents explore and solve autonomously
4. Collect results, integrate, verify coherence`
1. Decompose the implementation into independent work units
2. Assign one \`deep\` agent per unit — all via \`run_in_background=true\`
3. Give each agent a clear GOAL with success criteria, not step-by-step instructions
4. Collect all results, integrate, verify coherence across units`
}
export function buildUltraworkSection(

View File

@@ -39,6 +39,136 @@ Then ACTUALLY CALL those tools using the JSON tool schema. Produce the tool_use
</TOOL_CALL_MANDATE>`;
}
export function buildGeminiToolGuide(): string {
return `<GEMINI_TOOL_GUIDE>
## Tool Usage Guide — WHEN and HOW to Call Each Tool
You have access to tools via function calling. This guide defines WHEN to call each one.
**Violating these patterns = failed response.**
### Reading & Search (ALWAYS parallelizable — call multiple simultaneously)
| Tool | When to Call | Parallel? |
|---|---|---|
| \`Read\` | Before making ANY claim about file contents. Before editing any file. | <20> Yes — read multiple files at once |
| \`Grep\` | Finding patterns, imports, usages across codebase. BEFORE claiming "X is used in Y". | ✅ Yes — run multiple greps at once |
| \`Glob\` | Finding files by name/extension pattern. BEFORE claiming "file X exists". | ✅ Yes — run multiple globs at once |
| \`AstGrepSearch\` | Finding code patterns with AST awareness (structural matches). | ✅ Yes |
### Code Intelligence (parallelizable on different files)
| Tool | When to Call | Parallel? |
|---|---|---|
| \`LspDiagnostics\` | **AFTER EVERY edit.** BEFORE claiming task is done. MANDATORY. | ✅ Yes — different files |
| \`LspGotoDefinition\` | Finding where a symbol is defined. | ✅ Yes |
| \`LspFindReferences\` | Finding all usages of a symbol across workspace. | ✅ Yes |
| \`LspSymbols\` | Getting file outline or searching workspace symbols. | ✅ Yes |
### Editing (SEQUENTIAL — must Read first)
| Tool | When to Call | Parallel? |
|---|---|---|
| \`Edit\` | Modifying existing files. MUST Read file first to get LINE#ID anchors. | ❌ After Read |
| \`Write\` | Creating NEW files only. Or full file overwrite. | ❌ Sequential |
### Execution & Delegation
| Tool | When to Call | Parallel? |
|---|---|---|
| \`Bash\` | Running tests, builds, git commands. | ❌ Usually sequential |
| \`Task\` | ANY non-trivial implementation. Research via explore/librarian. | ✅ Fire multiple in background |
### Correct Sequences (MANDATORY — follow these exactly):
1. **Answer about code**: Read → (analyze) → Answer
2. **Edit code**: Read → Edit → LspDiagnostics → Report
3. **Find something**: Grep/Glob (parallel) → Read results → Report
4. **Implement feature**: Task(delegate) → Verify results → Report
5. **Debug**: Read error → Read file → Grep related → Fix → LspDiagnostics
### PARALLEL RULES:
- **Independent reads/searches**: ALWAYS call simultaneously in ONE response
- **Dependent operations**: Call sequentially (Edit AFTER Read, LspDiagnostics AFTER Edit)
- **Background agents**: ALWAYS \`run_in_background=true\`, continue working
</GEMINI_TOOL_GUIDE>`;
}
export function buildGeminiToolCallExamples(): string {
return `<GEMINI_TOOL_CALL_EXAMPLES>
## Correct Tool Calling Patterns — Follow These Examples
### Example 1: User asks about code → Read FIRST, then answer
**User**: "How does the auth middleware work?"
**CORRECT**:
\`\`\`
→ Call Read(filePath="/src/middleware/auth.ts")
→ Call Read(filePath="/src/config/auth.ts") // parallel with above
→ (After reading) Answer based on ACTUAL file contents
\`\`\`
**WRONG**:
\`\`\`
→ "The auth middleware likely validates JWT tokens by..." ← HALLUCINATION. You didn't read the file.
\`\`\`
### Example 2: User asks to edit code → Read, Edit, Verify
**User**: "Fix the type error in user.ts"
**CORRECT**:
\`\`\`
→ Call Read(filePath="/src/models/user.ts")
→ Call LspDiagnostics(filePath="/src/models/user.ts") // parallel with Read
→ (After reading) Call Edit with LINE#ID anchors
→ Call LspDiagnostics(filePath="/src/models/user.ts") // verify fix
→ Report: "Fixed. Diagnostics clean."
\`\`\`
**WRONG**:
\`\`\`
→ Call Edit without reading first ← No LINE#ID anchors = WILL FAIL
→ Skip LspDiagnostics after edit ← UNVERIFIED
\`\`\`
### Example 3: User asks to find something → Search in parallel
**User**: "Where is the database connection configured?"
**CORRECT**:
\`\`\`
→ Call Grep(pattern="database|connection|pool", path="/src") // fires simultaneously
→ Call Glob(pattern="**/*database*") // fires simultaneously
→ Call Glob(pattern="**/*db*") // fires simultaneously
→ (After results) Read the most relevant files
→ Report findings with file paths
\`\`\`
### Example 4: User asks to implement a feature → DELEGATE
**User**: "Add a new /health endpoint to the API"
**CORRECT**:
\`\`\`
→ Call Task(category="quick", load_skills=["typescript-programmer"], prompt="...")
→ (After agent completes) Read changed files to verify
→ Call LspDiagnostics on changed files
→ Report
\`\`\`
**WRONG**:
\`\`\`
→ Write the code yourself ← YOU ARE AN ORCHESTRATOR, NOT AN IMPLEMENTER
\`\`\`
### Example 5: Investigation ≠ Implementation
**User**: "Look into why the tests are failing"
**CORRECT**:
\`\`\`
→ Call Bash(command="npm test") // see actual failures
→ Call Read on failing test files
→ Call Read on source files under test
→ Report: "Tests fail because X. Root cause: Y. Proposed fix: Z."
→ STOP — wait for user to say "fix it"
\`\`\`
**WRONG**:
\`\`\`
→ Start editing source files immediately ← "look into" ≠ "fix"
\`\`\`
</GEMINI_TOOL_CALL_EXAMPLES>`;
}
export function buildGeminiDelegationOverride(): string {
return `<GEMINI_DELEGATION_OVERRIDE>
## DELEGATION IS MANDATORY — YOU ARE NOT AN IMPLEMENTER

View File

@@ -6,6 +6,8 @@ import {
buildGeminiDelegationOverride,
buildGeminiVerificationOverride,
buildGeminiIntentGateEnforcement,
buildGeminiToolGuide,
buildGeminiToolCallExamples,
} from "./sisyphus-gemini-overlays";
const MODE: AgentMode = "all";
@@ -32,6 +34,7 @@ import {
buildHardBlocksSection,
buildAntiPatternsSection,
buildDeepParallelSection,
buildNonClaudePlannerSection,
categorizeTools,
} from "./dynamic-agent-prompt-builder";
@@ -170,6 +173,7 @@ function buildDynamicSisyphusPrompt(
const hardBlocks = buildHardBlocksSection();
const antiPatterns = buildAntiPatternsSection();
const deepParallelSection = buildDeepParallelSection(model, availableCategories);
const nonClaudePlannerSection = buildNonClaudePlannerSection(model);
const taskManagementSection = buildTaskManagementSection(useTaskSystem);
const todoHookNote = useTaskSystem
? "YOUR TASK CREATION WOULD BE TRACKED BY HOOK([SYSTEM REMINDER - TASK CONTINUATION])"
@@ -364,6 +368,8 @@ STOP searching when:
${categorySkillsGuide}
${nonClaudePlannerSection}
${deepParallelSection}
${delegationTable}
@@ -564,12 +570,25 @@ export function createSisyphusAgent(
: buildDynamicSisyphusPrompt(model, [], tools, skills, categories, useTaskSystem);
if (isGeminiModel(model)) {
// 1. Intent gate + tool mandate — early in prompt (after intent verbalization)
prompt = prompt.replace(
"</intent_verbalization>",
`</intent_verbalization>\n\n${buildGeminiIntentGateEnforcement()}\n\n${buildGeminiToolMandate()}`
);
prompt += "\n" + buildGeminiDelegationOverride();
prompt += "\n" + buildGeminiVerificationOverride();
// 2. Tool guide + examples — after tool_usage_rules (where tools are discussed)
prompt = prompt.replace(
"</tool_usage_rules>",
`</tool_usage_rules>\n\n${buildGeminiToolGuide()}\n\n${buildGeminiToolCallExamples()}`
);
// 3. Delegation + verification overrides — before Constraints (NOT at prompt end)
// Gemini suffers from lost-in-the-middle: content at prompt end gets weaker attention.
// Placing these before <Constraints> ensures they're in a high-attention zone.
prompt = prompt.replace(
"<Constraints>",
`${buildGeminiDelegationOverride()}\n\n${buildGeminiVerificationOverride()}\n\n<Constraints>`
);
}
const permission = {

View File

@@ -49,6 +49,7 @@ export const HookNameSchema = z.enum([
"write-existing-file-guard",
"anthropic-effort",
"hashline-read-enhancer",
"read-image-resizer",
])
export type HookName = z.infer<typeof HookNameSchema>

View File

@@ -64,6 +64,51 @@ describe("createContextInjectorMessagesTransformHook", () => {
expect(output.messages[2].parts[1].text).toBe("Second message")
})
it("uses deterministic synthetic part ID across repeated transforms", async () => {
// given
const hook = createContextInjectorMessagesTransformHook(collector)
const sessionID = "ses_transform_deterministic"
const baseMessage = createMockMessage("user", "Stable message", sessionID)
collector.register(sessionID, {
id: "ctx-1",
source: "keyword-detector",
content: "Injected context",
})
const firstOutput = {
messages: [structuredClone(baseMessage)],
}
// when
await hook["experimental.chat.messages.transform"]!({}, firstOutput)
// then
const firstSyntheticPart = firstOutput.messages[0].parts[0]
expect(
"synthetic" in firstSyntheticPart && firstSyntheticPart.synthetic === true
).toBe(true)
// given
collector.register(sessionID, {
id: "ctx-2",
source: "keyword-detector",
content: "Injected context",
})
const secondOutput = {
messages: [structuredClone(baseMessage)],
}
// when
await hook["experimental.chat.messages.transform"]!({}, secondOutput)
// then
const secondSyntheticPart = secondOutput.messages[0].parts[0]
expect(
"synthetic" in secondSyntheticPart && secondSyntheticPart.synthetic === true
).toBe(true)
expect(secondSyntheticPart.id).toBe(firstSyntheticPart.id)
})
it("does nothing when no pending context", async () => {
// given
const hook = createContextInjectorMessagesTransformHook(collector)

View File

@@ -148,7 +148,7 @@ export function createContextInjectorMessagesTransformHook(
// synthetic part pattern (minimal fields)
const syntheticPart = {
id: `synthetic_hook_${Date.now()}`,
id: `synthetic_hook_${sessionID}`,
messageID: lastUserMessage.info.id,
sessionID: (lastUserMessage.info as { sessionID?: string }).sessionID ?? "",
type: "text" as const,

View File

@@ -50,3 +50,4 @@ export { createRuntimeFallbackHook, type RuntimeFallbackHook, type RuntimeFallba
export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
export { createJsonErrorRecoveryHook, JSON_ERROR_TOOL_EXCLUDE_LIST, JSON_ERROR_PATTERNS, JSON_ERROR_REMINDER } from "./json-error-recovery";
export { createReadImageResizerHook } from "./read-image-resizer"

View File

@@ -0,0 +1,286 @@
/// <reference types="bun-types" />
import { beforeEach, describe, expect, it, mock } from "bun:test"
import type { PluginInput } from "@opencode-ai/plugin"
import type { ImageDimensions, ResizeResult } from "./types"
const mockParseImageDimensions = mock((): ImageDimensions | null => null)
const mockCalculateTargetDimensions = mock((): ImageDimensions | null => null)
const mockResizeImage = mock(async (): Promise<ResizeResult | null> => null)
const mockGetSessionModel = mock((_sessionID: string) => ({
providerID: "anthropic",
modelID: "claude-sonnet-4-6",
} as { providerID: string; modelID: string } | undefined))
mock.module("./image-dimensions", () => ({
parseImageDimensions: mockParseImageDimensions,
}))
mock.module("./image-resizer", () => ({
calculateTargetDimensions: mockCalculateTargetDimensions,
resizeImage: mockResizeImage,
}))
mock.module("../../shared/session-model-state", () => ({
getSessionModel: mockGetSessionModel,
}))
import { createReadImageResizerHook } from "./hook"
type ToolOutput = {
title: string
output: string
metadata: unknown
attachments?: Array<{ mime: string; url: string; filename?: string }>
}
function createMockContext(): PluginInput {
return {
client: {} as PluginInput["client"],
directory: "/test",
} as PluginInput
}
function createInput(tool: string): { tool: string; sessionID: string; callID: string } {
return {
tool,
sessionID: "session-1",
callID: "call-1",
}
}
describe("createReadImageResizerHook", () => {
beforeEach(() => {
mockParseImageDimensions.mockReset()
mockCalculateTargetDimensions.mockReset()
mockResizeImage.mockReset()
mockGetSessionModel.mockReset()
mockGetSessionModel.mockReturnValue({ providerID: "anthropic", modelID: "claude-sonnet-4-6" })
})
it("skips non-Read tools", async () => {
//#given
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "image.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Bash"), output)
//#then
expect(output.output).toBe("original output")
expect(mockParseImageDimensions).not.toHaveBeenCalled()
})
it("skips when provider is not anthropic", async () => {
//#given
mockGetSessionModel.mockReturnValue({ providerID: "openai", modelID: "gpt-5.3-codex" })
mockParseImageDimensions.mockReturnValue({ width: 3000, height: 2000 })
mockCalculateTargetDimensions.mockReturnValue({ width: 1568, height: 1045 })
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "image.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toBe("original output")
expect(mockParseImageDimensions).not.toHaveBeenCalled()
})
it("skips when session model is unknown", async () => {
//#given
mockGetSessionModel.mockReturnValue(undefined)
mockParseImageDimensions.mockReturnValue({ width: 3000, height: 2000 })
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "image.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toBe("original output")
expect(mockParseImageDimensions).not.toHaveBeenCalled()
})
it("skips Read output with no attachments", async () => {
//#given
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toBe("original output")
expect(mockParseImageDimensions).not.toHaveBeenCalled()
})
it("skips non-image attachments", async () => {
//#given
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "application/pdf", url: "data:application/pdf;base64,AAAA", filename: "file.pdf" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toBe("original output")
expect(mockParseImageDimensions).not.toHaveBeenCalled()
})
it("skips unsupported image mime types", async () => {
//#given
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/heic", url: "data:image/heic;base64,AAAA", filename: "photo.heic" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toBe("original output")
expect(mockParseImageDimensions).not.toHaveBeenCalled()
})
it("appends within-limits metadata when image is already valid", async () => {
//#given
mockParseImageDimensions.mockReturnValue({ width: 800, height: 600 })
mockCalculateTargetDimensions.mockReturnValue(null)
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "image.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toContain("[Image Info]")
expect(output.output).toContain("within limits")
expect(output.attachments?.[0]?.url).toBe("data:image/png;base64,old")
expect(mockResizeImage).not.toHaveBeenCalled()
})
it("replaces attachment URL and appends resize metadata for oversized image", async () => {
//#given
mockParseImageDimensions.mockReturnValue({ width: 3000, height: 2000 })
mockCalculateTargetDimensions.mockReturnValue({ width: 1568, height: 1045 })
mockResizeImage.mockResolvedValue({
resizedDataUrl: "data:image/png;base64,resized",
original: { width: 3000, height: 2000 },
resized: { width: 1568, height: 1045 },
})
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "big.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.attachments?.[0]?.url).toBe("data:image/png;base64,resized")
expect(output.output).toContain("[Image Resize Info]")
expect(output.output).toContain("resized")
})
it("keeps original attachment URL and marks resize skipped when resize fails", async () => {
//#given
mockParseImageDimensions.mockReturnValue({ width: 3000, height: 2000 })
mockCalculateTargetDimensions.mockReturnValue({ width: 1568, height: 1045 })
mockResizeImage.mockResolvedValue(null)
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "fail.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.attachments?.[0]?.url).toBe("data:image/png;base64,old")
expect(output.output).toContain("resize skipped")
})
it("appends unknown-dimensions metadata when parsing fails", async () => {
//#given
mockParseImageDimensions.mockReturnValue(null)
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "corrupt.png" }],
}
//#when
await hook["tool.execute.after"](createInput("Read"), output)
//#then
expect(output.output).toContain("dimensions could not be parsed")
expect(mockCalculateTargetDimensions).not.toHaveBeenCalled()
})
it("fires for lowercase read tool name", async () => {
//#given
mockParseImageDimensions.mockReturnValue({ width: 800, height: 600 })
mockCalculateTargetDimensions.mockReturnValue(null)
const hook = createReadImageResizerHook(createMockContext())
const output: ToolOutput = {
title: "Read",
output: "original output",
metadata: {},
attachments: [{ mime: "image/png", url: "data:image/png;base64,old", filename: "image.png" }],
}
//#when
await hook["tool.execute.after"](createInput("read"), output)
//#then
expect(mockParseImageDimensions).toHaveBeenCalledTimes(1)
expect(output.output).toContain("within limits")
})
})

View File

@@ -0,0 +1,197 @@
import type { PluginInput } from "@opencode-ai/plugin"
import type { ImageAttachment, ImageDimensions } from "./types"
import { parseImageDimensions } from "./image-dimensions"
import { calculateTargetDimensions, resizeImage } from "./image-resizer"
import { log } from "../../shared"
import { getSessionModel } from "../../shared/session-model-state"
const SUPPORTED_IMAGE_MIMES = new Set(["image/png", "image/jpeg", "image/gif", "image/webp"])
const TOKEN_DIVISOR = 750
interface ResizeEntry {
filename: string
originalDims: ImageDimensions | null
resizedDims: ImageDimensions | null
status: "resized" | "within-limits" | "resize-skipped" | "unknown-dims"
}
function isReadTool(toolName: string): boolean {
return toolName.toLowerCase() === "read"
}
function asRecord(value: unknown): Record<string, unknown> | null {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null
}
return value as Record<string, unknown>
}
function isImageAttachmentRecord(
value: Record<string, unknown>,
): value is Record<string, unknown> & ImageAttachment {
const filename = value.filename
return (
typeof value.mime === "string" &&
typeof value.url === "string" &&
(typeof filename === "undefined" || typeof filename === "string")
)
}
function extractImageAttachments(output: Record<string, unknown>): ImageAttachment[] {
const attachmentsValue = output.attachments
if (!Array.isArray(attachmentsValue)) {
return []
}
const attachments: ImageAttachment[] = []
for (const attachmentValue of attachmentsValue) {
const attachmentRecord = asRecord(attachmentValue)
if (!attachmentRecord) {
continue
}
const mime = attachmentRecord.mime
const url = attachmentRecord.url
if (typeof mime !== "string" || typeof url !== "string") {
continue
}
const normalizedMime = mime.toLowerCase()
if (!SUPPORTED_IMAGE_MIMES.has(normalizedMime)) {
continue
}
attachmentRecord.mime = normalizedMime
attachmentRecord.url = url
if (isImageAttachmentRecord(attachmentRecord)) {
attachments.push(attachmentRecord)
}
}
return attachments
}
function calculateTokens(width: number, height: number): number {
return Math.ceil((width * height) / TOKEN_DIVISOR)
}
function formatResizeAppendix(entries: ResizeEntry[]): string {
const header = entries.some((entry) => entry.status === "resized") ? "[Image Resize Info]" : "[Image Info]"
const lines = [`\n\n${header}`]
for (const entry of entries) {
if (entry.status === "unknown-dims" || !entry.originalDims) {
lines.push(`- ${entry.filename}: dimensions could not be parsed`)
continue
}
const original = entry.originalDims
const originalText = `${original.width}x${original.height}`
const originalTokens = calculateTokens(original.width, original.height)
if (entry.status === "within-limits") {
lines.push(`- ${entry.filename}: ${originalText} (within limits, tokens: ${originalTokens})`)
continue
}
if (entry.status === "resize-skipped") {
lines.push(`- ${entry.filename}: ${originalText} (resize skipped, tokens: ${originalTokens})`)
continue
}
if (!entry.resizedDims) {
lines.push(`- ${entry.filename}: ${originalText} (resize skipped, tokens: ${originalTokens})`)
continue
}
const resized = entry.resizedDims
const resizedText = `${resized.width}x${resized.height}`
const resizedTokens = calculateTokens(resized.width, resized.height)
lines.push(
`- ${entry.filename}: ${originalText} -> ${resizedText} (resized, tokens: ${originalTokens} -> ${resizedTokens})`,
)
}
return lines.join("\n")
}
function resolveFilename(attachment: ImageAttachment, index: number): string {
if (attachment.filename && attachment.filename.trim().length > 0) {
return attachment.filename
}
return `image-${index + 1}`
}
export function createReadImageResizerHook(_ctx: PluginInput) {
return {
"tool.execute.after": async (
input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown },
) => {
if (!isReadTool(input.tool)) {
return
}
const sessionModel = getSessionModel(input.sessionID)
if (sessionModel?.providerID !== "anthropic") {
return
}
if (typeof output.output !== "string") {
return
}
const outputRecord = output as Record<string, unknown>
const attachments = extractImageAttachments(outputRecord)
if (attachments.length === 0) {
return
}
const entries: ResizeEntry[] = []
for (const [index, attachment] of attachments.entries()) {
const filename = resolveFilename(attachment, index)
try {
const originalDims = parseImageDimensions(attachment.url, attachment.mime)
if (!originalDims) {
entries.push({ filename, originalDims: null, resizedDims: null, status: "unknown-dims" })
continue
}
const targetDims = calculateTargetDimensions(originalDims.width, originalDims.height)
if (!targetDims) {
entries.push({
filename,
originalDims,
resizedDims: null,
status: "within-limits",
})
continue
}
const resizedResult = await resizeImage(attachment.url, attachment.mime, targetDims)
if (!resizedResult) {
entries.push({
filename,
originalDims,
resizedDims: null,
status: "resize-skipped",
})
continue
}
attachment.url = resizedResult.resizedDataUrl
entries.push({
filename,
originalDims: resizedResult.original,
resizedDims: resizedResult.resized,
status: "resized",
})
} catch (error) {
log("[read-image-resizer] attachment processing failed", {
error: error instanceof Error ? error.message : String(error),
filename,
})
entries.push({ filename, originalDims: null, resizedDims: null, status: "unknown-dims" })
}
}
if (entries.length === 0) {
return
}
output.output += formatResizeAppendix(entries)
},
}
}

View File

@@ -0,0 +1,108 @@
/// <reference types="bun-types" />
import { describe, expect, it } from "bun:test"
import { parseImageDimensions } from "./image-dimensions"
const PNG_1X1_DATA_URL =
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
const GIF_1X1_DATA_URL =
"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"
function createPngDataUrl(width: number, height: number): string {
const buf = Buffer.alloc(33)
buf.set([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a], 0)
buf.writeUInt32BE(13, 8)
buf.set([0x49, 0x48, 0x44, 0x52], 12)
buf.writeUInt32BE(width, 16)
buf.writeUInt32BE(height, 20)
return `data:image/png;base64,${buf.toString("base64")}`
}
function createGifDataUrl(width: number, height: number): string {
const buf = Buffer.alloc(10)
buf.set([0x47, 0x49, 0x46, 0x38, 0x39, 0x61], 0)
buf.writeUInt16LE(width, 6)
buf.writeUInt16LE(height, 8)
return `data:image/gif;base64,${buf.toString("base64")}`
}
describe("parseImageDimensions", () => {
it("parses PNG 1x1 dimensions", () => {
//#given
const dataUrl = PNG_1X1_DATA_URL
//#when
const result = parseImageDimensions(dataUrl, "image/png")
//#then
expect(result).toEqual({ width: 1, height: 1 })
})
it("parses PNG dimensions from IHDR", () => {
//#given
const dataUrl = createPngDataUrl(3000, 2000)
//#when
const result = parseImageDimensions(dataUrl, "image/png")
//#then
expect(result).toEqual({ width: 3000, height: 2000 })
})
it("parses GIF 1x1 dimensions", () => {
//#given
const dataUrl = GIF_1X1_DATA_URL
//#when
const result = parseImageDimensions(dataUrl, "image/gif")
//#then
expect(result).toEqual({ width: 1, height: 1 })
})
it("parses GIF dimensions from logical screen descriptor", () => {
//#given
const dataUrl = createGifDataUrl(320, 240)
//#when
const result = parseImageDimensions(dataUrl, "image/gif")
//#then
expect(result).toEqual({ width: 320, height: 240 })
})
it("returns null for empty input", () => {
//#given
const dataUrl = ""
//#when
const result = parseImageDimensions(dataUrl, "image/png")
//#then
expect(result).toBeNull()
})
it("returns null for too-short PNG buffer", () => {
//#given
const dataUrl = "data:image/png;base64,AAAA"
//#when
const result = parseImageDimensions(dataUrl, "image/png")
//#then
expect(result).toBeNull()
})
it("returns null for unsupported mime type", () => {
//#given
const dataUrl = PNG_1X1_DATA_URL
//#when
const result = parseImageDimensions(dataUrl, "image/heic")
//#then
expect(result).toBeNull()
})
})

View File

@@ -0,0 +1,187 @@
import type { ImageDimensions } from "./types"
import { extractBase64Data } from "../../tools/look-at/mime-type-inference"
function toImageDimensions(width: number, height: number): ImageDimensions | null {
if (!Number.isFinite(width) || !Number.isFinite(height)) {
return null
}
if (width <= 0 || height <= 0) {
return null
}
return { width, height }
}
function parsePngDimensions(buffer: Buffer): ImageDimensions | null {
if (buffer.length < 24) {
return null
}
const isPngSignature =
buffer[0] === 0x89 &&
buffer[1] === 0x50 &&
buffer[2] === 0x4e &&
buffer[3] === 0x47 &&
buffer[4] === 0x0d &&
buffer[5] === 0x0a &&
buffer[6] === 0x1a &&
buffer[7] === 0x0a
if (!isPngSignature || buffer.toString("ascii", 12, 16) !== "IHDR") {
return null
}
const width = buffer.readUInt32BE(16)
const height = buffer.readUInt32BE(20)
return toImageDimensions(width, height)
}
function parseGifDimensions(buffer: Buffer): ImageDimensions | null {
if (buffer.length < 10) {
return null
}
if (buffer.toString("ascii", 0, 4) !== "GIF8") {
return null
}
const width = buffer.readUInt16LE(6)
const height = buffer.readUInt16LE(8)
return toImageDimensions(width, height)
}
function parseJpegDimensions(buffer: Buffer): ImageDimensions | null {
if (buffer.length < 4 || buffer[0] !== 0xff || buffer[1] !== 0xd8) {
return null
}
let offset = 2
while (offset < buffer.length) {
if (buffer[offset] !== 0xff) {
offset += 1
continue
}
while (offset < buffer.length && buffer[offset] === 0xff) {
offset += 1
}
if (offset >= buffer.length) {
return null
}
const marker = buffer[offset]
offset += 1
if (marker === 0xd9 || marker === 0xda) {
break
}
if (offset + 1 >= buffer.length) {
return null
}
const segmentLength = buffer.readUInt16BE(offset)
if (segmentLength < 2) {
return null
}
if ((marker === 0xc0 || marker === 0xc2) && offset + 7 < buffer.length) {
const height = buffer.readUInt16BE(offset + 3)
const width = buffer.readUInt16BE(offset + 5)
return toImageDimensions(width, height)
}
offset += segmentLength
}
return null
}
function readUInt24LE(buffer: Buffer, offset: number): number {
return buffer[offset] | (buffer[offset + 1] << 8) | (buffer[offset + 2] << 16)
}
function parseWebpDimensions(buffer: Buffer): ImageDimensions | null {
if (buffer.length < 16) {
return null
}
if (buffer.toString("ascii", 0, 4) !== "RIFF" || buffer.toString("ascii", 8, 12) !== "WEBP") {
return null
}
const chunkType = buffer.toString("ascii", 12, 16)
if (chunkType === "VP8 ") {
if (buffer[23] !== 0x9d || buffer[24] !== 0x01 || buffer[25] !== 0x2a) {
return null
}
const width = buffer.readUInt16LE(26) & 0x3fff
const height = buffer.readUInt16LE(28) & 0x3fff
return toImageDimensions(width, height)
}
if (chunkType === "VP8L") {
if (buffer.length < 25 || buffer[20] !== 0x2f) {
return null
}
const bits = buffer.readUInt32LE(21)
const width = (bits & 0x3fff) + 1
const height = ((bits >>> 14) & 0x3fff) + 1
return toImageDimensions(width, height)
}
if (chunkType === "VP8X") {
const width = readUInt24LE(buffer, 24) + 1
const height = readUInt24LE(buffer, 27) + 1
return toImageDimensions(width, height)
}
return null
}
export function parseImageDimensions(base64DataUrl: string, mimeType: string): ImageDimensions | null {
try {
if (!base64DataUrl || !mimeType) {
return null
}
const rawBase64 = extractBase64Data(base64DataUrl)
if (!rawBase64) {
return null
}
const buffer = Buffer.from(rawBase64, "base64")
if (buffer.length === 0) {
return null
}
const normalizedMime = mimeType.toLowerCase()
if (normalizedMime === "image/png") {
return parsePngDimensions(buffer)
}
if (normalizedMime === "image/gif") {
return parseGifDimensions(buffer)
}
if (normalizedMime === "image/jpeg" || normalizedMime === "image/jpg") {
return parseJpegDimensions(buffer)
}
if (normalizedMime === "image/webp") {
return parseWebpDimensions(buffer)
}
return null
} catch {
return null
}
}

View File

@@ -0,0 +1,132 @@
/// <reference types="bun-types" />
import { afterEach, describe, expect, it, mock } from "bun:test"
const PNG_1X1_DATA_URL =
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
type ImageResizerModule = typeof import("./image-resizer")
async function importFreshImageResizerModule(): Promise<ImageResizerModule> {
return import(`./image-resizer?test-${Date.now()}-${Math.random()}`)
}
describe("calculateTargetDimensions", () => {
it("returns null when dimensions are already within limits", async () => {
//#given
const { calculateTargetDimensions } = await importFreshImageResizerModule()
//#when
const result = calculateTargetDimensions(800, 600)
//#then
expect(result).toBeNull()
})
it("returns null at exact long-edge boundary", async () => {
//#given
const { calculateTargetDimensions } = await importFreshImageResizerModule()
//#when
const result = calculateTargetDimensions(1568, 1000)
//#then
expect(result).toBeNull()
})
it("scales landscape dimensions by max long edge", async () => {
//#given
const { calculateTargetDimensions } = await importFreshImageResizerModule()
//#when
const result = calculateTargetDimensions(3000, 2000)
//#then
expect(result).toEqual({
width: 1568,
height: Math.floor(2000 * (1568 / 3000)),
})
})
it("scales portrait dimensions by max long edge", async () => {
//#given
const { calculateTargetDimensions } = await importFreshImageResizerModule()
//#when
const result = calculateTargetDimensions(2000, 3000)
//#then
expect(result).toEqual({
width: Math.floor(2000 * (1568 / 3000)),
height: 1568,
})
})
it("scales square dimensions to exact target", async () => {
//#given
const { calculateTargetDimensions } = await importFreshImageResizerModule()
//#when
const result = calculateTargetDimensions(4000, 4000)
//#then
expect(result).toEqual({ width: 1568, height: 1568 })
})
it("uses custom maxLongEdge when provided", async () => {
//#given
const { calculateTargetDimensions } = await importFreshImageResizerModule()
//#when
const result = calculateTargetDimensions(2000, 1000, 1000)
//#then
expect(result).toEqual({ width: 1000, height: 500 })
})
})
describe("resizeImage", () => {
afterEach(() => {
mock.restore()
})
it("returns null when sharp import fails", async () => {
//#given
mock.module("sharp", () => {
throw new Error("sharp unavailable")
})
const { resizeImage } = await importFreshImageResizerModule()
//#when
const result = await resizeImage(PNG_1X1_DATA_URL, "image/png", {
width: 1,
height: 1,
})
//#then
expect(result).toBeNull()
})
it("returns null when sharp throws during resize", async () => {
//#given
const mockSharpFactory = mock(() => ({
resize: () => {
throw new Error("resize failed")
},
}))
mock.module("sharp", () => ({
default: mockSharpFactory,
}))
const { resizeImage } = await importFreshImageResizerModule()
//#when
const result = await resizeImage(PNG_1X1_DATA_URL, "image/png", {
width: 1,
height: 1,
})
//#then
expect(result).toBeNull()
})
})

View File

@@ -0,0 +1,184 @@
import type { ImageDimensions, ResizeResult } from "./types"
import { extractBase64Data } from "../../tools/look-at/mime-type-inference"
import { log } from "../../shared"
const ANTHROPIC_MAX_LONG_EDGE = 1568
const ANTHROPIC_MAX_FILE_SIZE = 5 * 1024 * 1024
type SharpFormat = "jpeg" | "png" | "gif" | "webp"
interface SharpMetadata {
width?: number
height?: number
}
interface SharpInstance {
resize(width: number, height: number, options: { fit: "inside" }): SharpInstance
toFormat(format: SharpFormat, options?: { quality?: number }): SharpInstance
toBuffer(): Promise<Buffer>
metadata(): Promise<SharpMetadata>
}
type SharpFactory = (input: Buffer) => SharpInstance
function resolveSharpFactory(sharpModule: unknown): SharpFactory | null {
if (typeof sharpModule === "function") {
return sharpModule as SharpFactory
}
if (!sharpModule || typeof sharpModule !== "object") {
return null
}
const defaultExport = Reflect.get(sharpModule, "default")
return typeof defaultExport === "function" ? (defaultExport as SharpFactory) : null
}
function resolveSharpFormat(mimeType: string): SharpFormat {
const normalizedMime = mimeType.toLowerCase()
if (normalizedMime === "image/png") {
return "png"
}
if (normalizedMime === "image/gif") {
return "gif"
}
if (normalizedMime === "image/webp") {
return "webp"
}
return "jpeg"
}
function canAdjustQuality(format: SharpFormat): boolean {
return format === "jpeg" || format === "webp"
}
function toDimensions(metadata: SharpMetadata): ImageDimensions | null {
const { width, height } = metadata
if (!width || !height) {
return null
}
return { width, height }
}
async function renderResizedBuffer(args: {
sharpFactory: SharpFactory
inputBuffer: Buffer
target: ImageDimensions
format: SharpFormat
quality?: number
}): Promise<Buffer> {
const { sharpFactory, inputBuffer, target, format, quality } = args
return sharpFactory(inputBuffer)
.resize(target.width, target.height, { fit: "inside" })
.toFormat(format, quality ? { quality } : undefined)
.toBuffer()
}
function getErrorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error)
}
export function calculateTargetDimensions(
width: number,
height: number,
maxLongEdge = ANTHROPIC_MAX_LONG_EDGE,
): ImageDimensions | null {
if (width <= 0 || height <= 0 || maxLongEdge <= 0) {
return null
}
const longEdge = Math.max(width, height)
if (longEdge <= maxLongEdge) {
return null
}
if (width >= height) {
return {
width: maxLongEdge,
height: Math.max(1, Math.floor((height * maxLongEdge) / width)),
}
}
return {
width: Math.max(1, Math.floor((width * maxLongEdge) / height)),
height: maxLongEdge,
}
}
export async function resizeImage(
base64DataUrl: string,
mimeType: string,
target: ImageDimensions,
): Promise<ResizeResult | null> {
try {
const sharpModuleName = "sharp"
const sharpModule = await import(sharpModuleName).catch(() => null)
if (!sharpModule) {
log("[read-image-resizer] sharp unavailable, skipping resize")
return null
}
const sharpFactory = resolveSharpFactory(sharpModule)
if (!sharpFactory) {
log("[read-image-resizer] sharp import has unexpected shape")
return null
}
const rawBase64 = extractBase64Data(base64DataUrl)
if (!rawBase64) {
return null
}
const inputBuffer = Buffer.from(rawBase64, "base64")
if (inputBuffer.length === 0) {
return null
}
const original = toDimensions(await sharpFactory(inputBuffer).metadata())
if (!original) {
return null
}
const format = resolveSharpFormat(mimeType)
let resizedBuffer = await renderResizedBuffer({
sharpFactory,
inputBuffer,
target,
format,
})
if (resizedBuffer.length > ANTHROPIC_MAX_FILE_SIZE && canAdjustQuality(format)) {
for (const quality of [80, 60, 40]) {
resizedBuffer = await renderResizedBuffer({
sharpFactory,
inputBuffer,
target,
format,
quality,
})
if (resizedBuffer.length <= ANTHROPIC_MAX_FILE_SIZE) {
break
}
}
}
const resized = toDimensions(await sharpFactory(resizedBuffer).metadata())
if (!resized) {
return null
}
return {
resizedDataUrl: `data:${mimeType};base64,${resizedBuffer.toString("base64")}`,
original,
resized,
}
} catch (error) {
log("[read-image-resizer] resize failed", {
error: getErrorMessage(error),
mimeType,
target,
})
return null
}
}

View File

@@ -0,0 +1 @@
export { createReadImageResizerHook } from "./hook"

View File

@@ -0,0 +1,16 @@
export interface ImageDimensions {
width: number
height: number
}
export interface ImageAttachment {
mime: string
url: string
filename?: string
}
export interface ResizeResult {
resizedDataUrl: string
original: ImageDimensions
resized: ImageDimensions
}

View File

@@ -12,6 +12,7 @@ import {
createTasksTodowriteDisablerHook,
createWriteExistingFileGuardHook,
createHashlineReadEnhancerHook,
createReadImageResizerHook,
createJsonErrorRecoveryHook,
} from "../../hooks"
import {
@@ -33,6 +34,7 @@ export type ToolGuardHooks = {
writeExistingFileGuard: ReturnType<typeof createWriteExistingFileGuardHook> | null
hashlineReadEnhancer: ReturnType<typeof createHashlineReadEnhancerHook> | null
jsonErrorRecovery: ReturnType<typeof createJsonErrorRecoveryHook> | null
readImageResizer: ReturnType<typeof createReadImageResizerHook> | null
}
export function createToolGuardHooks(args: {
@@ -105,6 +107,10 @@ export function createToolGuardHooks(args: {
? safeHook("json-error-recovery", () => createJsonErrorRecoveryHook(ctx))
: null
const readImageResizer = isHookEnabled("read-image-resizer")
? safeHook("read-image-resizer", () => createReadImageResizerHook(ctx))
: null
return {
commentChecker,
toolOutputTruncator,
@@ -116,5 +122,6 @@ export function createToolGuardHooks(args: {
writeExistingFileGuard,
hashlineReadEnhancer,
jsonErrorRecovery,
readImageResizer,
}
}

View File

@@ -43,6 +43,7 @@ export function createToolExecuteAfterHandler(args: {
await hooks.delegateTaskRetry?.["tool.execute.after"]?.(input, output)
await hooks.atlasHook?.["tool.execute.after"]?.(input, output)
await hooks.taskResumeInfo?.["tool.execute.after"]?.(input, output)
await hooks.readImageResizer?.["tool.execute.after"]?.(input, output)
await hooks.hashlineReadEnhancer?.["tool.execute.after"]?.(input, output)
await hooks.jsonErrorRecovery?.["tool.execute.after"]?.(input, output)
}