fix(tts): preserve legacy tool voice hints · openclaw/openclaw@60f9358

Recent Commits to openclaw:main

steipete · 2026-04-26 · via Recent Commits to openclaw:main

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,9 @@ Docs: https://docs.openclaw.ai`
`25`	`25`
`26`	`26`	`### Fixes`
`27`	`27`
	`28`	+- Agents/TTS: preserve legacy `[[audio_as_voice]]` hints on trusted tool-result
	`29`	+ `MEDIA:` payloads so generated audio still delivers as a voice note. (#46535)
	`30`	`+ Thanks @azade-c.`
`28`	`31`	`- Telegram/STT: frame inbound voice-note transcripts as machine-generated,`
`29`	`32`	`untrusted text in agent context while preserving raw transcript mention`
`30`	`33`	`detection. Closes #33360. Thanks @smartchainark.`

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@ Assistant output can carry a small set of delivery/render directives:`
`14`	`14`	- `[embed ...]` for Control UI rich rendering
`15`	`15`
`16`	`16`	These directives are separate. `MEDIA:` and reply/voice tags remain delivery metadata; `[embed ...]` is the web-only rich render path.
	`17`	+Trusted tool-result media uses the same `MEDIA:` / `[[audio_as_voice]]` parser before delivery, so legacy tool outputs can still mark an audio attachment as a voice note.
`17`	`18`
`18`	`19`	When block streaming is enabled, `MEDIA:` remains single-delivery metadata for a
`19`	`20`	`turn. If the same media URL is sent in a streamed block and repeated in the final`

Original file line number	Diff line number	Diff line change
`@@ -165,6 +165,34 @@ describe("handleToolExecutionEnd media emission", () => {`
`165`	`165`	`expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/screenshot.png"]);`
`166`	`166`	`});`
`167`	`167`
	`168`	`+it("preserves legacy audio_as_voice when queuing trusted MEDIA tool output", async () => {`
	`169`	`+const onToolResult = vi.fn();`
	`170`	`+const ctx = createMockContext({`
	`171`	`+shouldEmitToolOutput: false,`
	`172`	`+ onToolResult,`
	`173`	`+builtinToolNames: new Set(["tts"]),`
	`174`	`+});`
	`175`	`+`
	`176`	`+await handleToolExecutionEnd(ctx, {`
	`177`	`+type: "tool_execution_end",`
	`178`	`+toolName: "tts",`
	`179`	`+toolCallId: "tc-1",`
	`180`	`+isError: false,`
	`181`	`+result: {`
	`182`	`+content: [`
	`183`	`+{`
	`184`	`+type: "text",`
	`185`	`+text: "Generated audio reply.\n[[audio_as_voice]]\nMEDIA:/tmp/reply.opus",`
	`186`	`+},`
	`187`	`+],`
	`188`	`+},`
	`189`	`+});`
	`190`	`+`
	`191`	`+expect(onToolResult).not.toHaveBeenCalled();`
	`192`	`+expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);`
	`193`	`+expect(ctx.state.pendingToolAudioAsVoice).toBe(true);`
	`194`	`+});`
	`195`	`+`
`168`	`196`	`it("does NOT emit local media for untrusted tools", async () => {`
`169`	`197`	`const onToolResult = vi.fn();`
`170`	`198`	`const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });`

Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,33 @@ describe("extractToolResultMediaPaths", () => {`
`51`	`51`	`});`
`52`	`52`	`});`
`53`	`53`
	`54`	`+it("extracts audioAsVoice from legacy MEDIA text", () => {`
	`55`	`+expect(`
	`56`	`+extractToolResultMediaArtifact({`
	`57`	`+content: [`
	`58`	`+{ type: "text", text: "Generated audio\n[[audio_as_voice]]\nMEDIA:/tmp/reply.opus" },`
	`59`	`+],`
	`60`	`+}),`
	`61`	`+).toEqual({`
	`62`	`+mediaUrls: ["/tmp/reply.opus"],`
	`63`	`+audioAsVoice: true,`
	`64`	`+});`
	`65`	`+});`
	`66`	`+`
	`67`	`+it("keeps legacy audioAsVoice when the tag and MEDIA path are in separate text blocks", () => {`
	`68`	`+expect(`
	`69`	`+extractToolResultMediaArtifact({`
	`70`	`+content: [`
	`71`	`+{ type: "text", text: "[[audio_as_voice]]" },`
	`72`	`+{ type: "text", text: "MEDIA:/tmp/reply.opus" },`
	`73`	`+],`
	`74`	`+}),`
	`75`	`+).toEqual({`
	`76`	`+mediaUrls: ["/tmp/reply.opus"],`
	`77`	`+audioAsVoice: true,`
	`78`	`+});`
	`79`	`+});`
	`80`	`+`
`54`	`81`	`it("extracts structured media trust markers", () => {`
`55`	`82`	`expect(`
`56`	`83`	`extractToolResultMediaArtifact({`

Original file line number	Diff line number	Diff line change
`@@ -307,6 +307,7 @@ export function extractToolResultMediaArtifact(`
`307`	`307`	`// parser so directive matching and validation stay in sync with outbound`
`308`	`308`	`// reply parsing.`
`309`	`309`	`const paths: string[] = [];`
	`310`	`+let audioAsVoice = false;`
`310`	`311`	`let hasImageContent = false;`
`311`	`312`	`for (const item of content) {`
`312`	`313`	`if (!item \|\| typeof item !== "object") {`
`@@ -319,14 +320,20 @@ export function extractToolResultMediaArtifact(`
`319`	`320`	`}`
`320`	`321`	`if (entry.type === "text" && typeof entry.text === "string") {`
`321`	`322`	`const parsed = splitMediaFromOutput(entry.text);`
	`323`	`+if (parsed.audioAsVoice) {`
	`324`	`+audioAsVoice = true;`
	`325`	`+}`
`322`	`326`	`if (parsed.mediaUrls?.length) {`
`323`	`327`	`paths.push(...parsed.mediaUrls);`
`324`	`328`	`}`
`325`	`329`	`}`
`326`	`330`	`}`
`327`	`331`
`328`	`332`	`if (paths.length > 0) {`
`329`		`-return { mediaUrls: paths };`
	`333`	`+return {`
	`334`	`+mediaUrls: paths,`
	`335`	`+ ...(audioAsVoice ? { audioAsVoice: true } : {}),`
	`336`	`+};`
`330`	`337`	`}`
`331`	`338`
`332`	`339`	`// Fall back to legacy details.path when image content exists but no`

此内容由惯性聚合(RSS阅读器)自动聚合整理，仅供阅读参考。原文来自 — 版权归原作者所有。