























@@ -231,7 +231,11 @@ describe("handleToolExecutionEnd media emission", () => {
231231});
232232233233it("still queues structured media when verbose is full", async () => {
234-const ctx = createMockContext({ shouldEmitToolOutput: true, onToolResult: vi.fn() });
234+const ctx = createMockContext({
235+shouldEmitToolOutput: true,
236+onToolResult: vi.fn(),
237+toolResultFormat: "plain",
238+});
235239236240await handleToolExecutionEnd(ctx, {
237241type: "tool_execution_end",
@@ -254,6 +258,34 @@ describe("handleToolExecutionEnd media emission", () => {
254258expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
255259});
256260261+it("does not queue a duplicate voice copy when emitted tool output already sent the same audio", async () => {
262+const ctx = createMockContext({
263+shouldEmitToolOutput: true,
264+onToolResult: vi.fn(),
265+toolResultFormat: "plain",
266+});
267+268+await handleToolExecutionEnd(ctx, {
269+type: "tool_execution_end",
270+toolName: "tts",
271+toolCallId: "tc-1",
272+isError: false,
273+result: {
274+content: [{ type: "text", text: "Generated audio reply.\nMEDIA:/tmp/reply.opus" }],
275+details: {
276+media: {
277+mediaUrl: "/tmp/reply.opus",
278+audioAsVoice: true,
279+},
280+},
281+},
282+});
283+284+expect(ctx.emitToolOutput).toHaveBeenCalled();
285+expect(ctx.state.pendingToolMediaUrls).toEqual([]);
286+expect(ctx.state.pendingToolAudioAsVoice).toBe(false);
287+});
288+257289async function handleVerboseGeneratedImage(toolResultFormat: "plain" | "markdown") {
258290const ctx = createMockContext({
259291shouldEmitToolOutput: true,
此内容由惯性聚合(RSS阅读器)自动聚合整理,仅供阅读参考。 原文来自 — 版权归原作者所有。