
























@@ -18,6 +18,7 @@ describe("buildXiaomiSpeechProvider", () => {
1818expect(provider.aliases).toContain("mimo");
1919expect(provider.models).toContain("mimo-v2.5-tts");
2020expect(provider.models).toContain("mimo-v2-tts");
21+expect(provider.models).toContain("mimo-v2.5-tts-voicedesign");
2122expect(provider.voices).toContain("mimo_default");
2223});
2324});
@@ -81,6 +82,24 @@ describe("buildXiaomiSpeechProvider", () => {
8182});
8283expect(config.voice).toBe("default_zh");
8384});
85+86+it("accepts generic model and speaker voice aliases", () => {
87+const config = provider.resolveConfig!({
88+rawConfig: {
89+providers: {
90+xiaomi: {
91+modelId: "mimo-v2.5-tts-voicedesign",
92+speakerVoice: "Chloe",
93+},
94+},
95+},
96+cfg: {} as never,
97+timeoutMs: 30000,
98+});
99+100+expect(config.model).toBe("mimo-v2.5-tts-voicedesign");
101+expect(config.voice).toBe("Chloe");
102+});
84103});
8510486105describe("parseDirectiveToken", () => {
@@ -179,6 +198,80 @@ describe("buildXiaomiSpeechProvider", () => {
179198expect(transcodeAudioBufferToOpusMock).not.toHaveBeenCalled();
180199});
181200201+it("omits voice and uses configured style for Xiaomi voice design models", async () => {
202+const audio = Buffer.from("fake-wav-audio").toString("base64");
203+const mockFetch = vi.mocked(globalThis.fetch);
204+mockFetch.mockResolvedValueOnce(
205+new Response(JSON.stringify({ choices: [{ message: { audio: { data: audio } } }] }), {
206+status: 200,
207+headers: { "Content-Type": "application/json" },
208+}),
209+);
210+211+const result = await provider.synthesize({
212+text: "Hello from OpenClaw.",
213+cfg: {} as never,
214+providerConfig: {
215+apiKey: "sk-test",
216+modelId: "mimo-v2.5-tts-voicedesign",
217+speakerVoice: "Chloe",
218+format: "wav",
219+style: "Warm, bright, natural voice.",
220+},
221+target: "audio-file",
222+timeoutMs: 30000,
223+});
224+225+expect(result.outputFormat).toBe("wav");
226+expect(result.fileExtension).toBe(".wav");
227+expect(result.voiceCompatible).toBe(false);
228+expect(result.audioBuffer.toString()).toBe("fake-wav-audio");
229+230+expect(mockFetch).toHaveBeenCalledOnce();
231+const [, init] = mockFetch.mock.calls[0] ?? [];
232+const body = JSON.parse(init!.body as string);
233+expect(body.model).toBe("mimo-v2.5-tts-voicedesign");
234+expect(body.messages).toEqual([
235+{ role: "user", content: "Warm, bright, natural voice." },
236+{ role: "assistant", content: "Hello from OpenClaw." },
237+]);
238+expect(body.audio).toEqual({ format: "wav" });
239+});
240+241+it("uses a default style for Xiaomi voice design models", async () => {
242+const audio = Buffer.from("fake-mp3-audio").toString("base64");
243+const mockFetch = vi.mocked(globalThis.fetch);
244+mockFetch.mockResolvedValueOnce(
245+new Response(JSON.stringify({ choices: [{ message: { audio: { data: audio } } }] }), {
246+status: 200,
247+headers: { "Content-Type": "application/json" },
248+}),
249+);
250+251+await provider.synthesize({
252+text: "Hello from OpenClaw.",
253+cfg: {} as never,
254+providerConfig: {
255+apiKey: "sk-test",
256+model: "mimo-v2.5-tts-voicedesign",
257+},
258+target: "audio-file",
259+timeoutMs: 30000,
260+});
261+262+expect(mockFetch).toHaveBeenCalledOnce();
263+const [, init] = mockFetch.mock.calls[0] ?? [];
264+const body = JSON.parse(init!.body as string);
265+expect(body.messages).toHaveLength(2);
266+expect(body.messages[0]?.role).toBe("user");
267+expect(body.messages[0]?.content).toContain("natural");
268+expect(body.messages[1]).toEqual({
269+role: "assistant",
270+content: "Hello from OpenClaw.",
271+});
272+expect(body.audio).toEqual({ format: "mp3" });
273+});
274+182275it("transcodes Xiaomi output to Opus for voice-note targets", async () => {
183276const audio = Buffer.from("fake-mp3-audio").toString("base64");
184277vi.mocked(globalThis.fetch).mockResolvedValueOnce(
@@ -209,6 +302,43 @@ describe("buildXiaomiSpeechProvider", () => {
209302});
210303});
211304305+it("transcodes Xiaomi voice design output to Opus for voice-note targets", async () => {
306+const audio = Buffer.from("fake-wav-audio").toString("base64");
307+vi.mocked(globalThis.fetch).mockResolvedValueOnce(
308+new Response(JSON.stringify({ choices: [{ message: { audio: { data: audio } } }] }), {
309+status: 200,
310+headers: { "Content-Type": "application/json" },
311+}),
312+);
313+transcodeAudioBufferToOpusMock.mockResolvedValueOnce(Buffer.from("fake-opus-audio"));
314+315+const result = await provider.synthesize({
316+text: "Hello from OpenClaw.",
317+cfg: {} as never,
318+providerConfig: {
319+apiKey: "sk-test",
320+model: "mimo-v2.5-tts-voicedesign",
321+format: "wav",
322+},
323+target: "voice-note",
324+timeoutMs: 30000,
325+});
326+327+expect(result.outputFormat).toBe("opus");
328+expect(result.fileExtension).toBe(".opus");
329+expect(result.voiceCompatible).toBe(true);
330+expect(result.audioBuffer.toString()).toBe("fake-opus-audio");
331+expect(transcodeAudioBufferToOpusMock).toHaveBeenCalledWith({
332+audioBuffer: Buffer.from("fake-wav-audio"),
333+inputExtension: "wav",
334+tempPrefix: "tts-xiaomi-",
335+timeoutMs: 30000,
336+});
337+const [, init] = vi.mocked(globalThis.fetch).mock.calls[0] ?? [];
338+const body = JSON.parse(init!.body as string);
339+expect(body.audio).toEqual({ format: "wav" });
340+});
341+212342it("caps oversized TTS request timeouts before scheduling or fetching", async () => {
213343const audio = Buffer.from("fake-mp3-audio").toString("base64");
214344const timeoutSpy = vi
此内容由惯性聚合(RSS阅读器)自动聚合整理,仅供阅读参考。 原文来自 — 版权归原作者所有。