@@ -430,6 +430,78 @@ describe("WebRtcSdpRealtimeTalkTransport", () => {
|
430 | 430 | transport.stop(); |
431 | 431 | }); |
432 | 432 | |
| 433 | +// Audio output sends the final string in `transcript`; text output sends it in |
| 434 | +// `text`. Both must surface the same assistant transcript + talk events. |
| 435 | +it.each([ |
| 436 | +{ |
| 437 | +label: "audio output", |
| 438 | +deltaType: "response.output_audio_transcript.delta", |
| 439 | +doneType: "response.output_audio_transcript.done", |
| 440 | +doneField: { transcript: "hi there" }, |
| 441 | +}, |
| 442 | +{ |
| 443 | +label: "text output", |
| 444 | +deltaType: "response.output_text.delta", |
| 445 | +doneType: "response.output_text.done", |
| 446 | +doneField: { text: "hi there" }, |
| 447 | +}, |
| 448 | +])( |
| 449 | +"emits assistant transcripts from OpenAI Realtime $label events", |
| 450 | +async ({ deltaType, doneType, doneField }) => { |
| 451 | +vi.stubGlobal( |
| 452 | +"fetch", |
| 453 | +vi.fn(async () => new Response("answer-sdp")) as unknown as typeof fetch, |
| 454 | +); |
| 455 | +const onTranscript = vi.fn(); |
| 456 | +const onTalkEvent = vi.fn(); |
| 457 | +const transport = new WebRtcSdpRealtimeTalkTransport( |
| 458 | +{ |
| 459 | +provider: "openai", |
| 460 | +transport: "webrtc", |
| 461 | +clientSecret: "client-secret-123", |
| 462 | +}, |
| 463 | +{ |
| 464 | +client: {} as never, |
| 465 | +sessionKey: "main", |
| 466 | +callbacks: { onTranscript, onTalkEvent }, |
| 467 | +}, |
| 468 | +); |
| 469 | + |
| 470 | +await transport.start(); |
| 471 | +const peer = FakePeerConnection.instances[0]; |
| 472 | +peer?.channel.dispatchEvent( |
| 473 | +new MessageEvent("message", { |
| 474 | +data: JSON.stringify({ type: deltaType, item_id: "response-1", delta: "hi" }), |
| 475 | +}), |
| 476 | +); |
| 477 | +peer?.channel.dispatchEvent( |
| 478 | +new MessageEvent("message", { |
| 479 | +data: JSON.stringify({ type: doneType, item_id: "response-1", ...doneField }), |
| 480 | +}), |
| 481 | +); |
| 482 | + |
| 483 | +expect(onTranscript).toHaveBeenCalledWith({ |
| 484 | +role: "assistant", |
| 485 | +text: "hi", |
| 486 | +final: false, |
| 487 | +}); |
| 488 | +expect(onTranscript).toHaveBeenCalledWith({ |
| 489 | +role: "assistant", |
| 490 | +text: "hi there", |
| 491 | +final: true, |
| 492 | +}); |
| 493 | +expect(onTalkEvent.mock.calls.map(([event]) => event.type)).toEqual([ |
| 494 | +"output.text.delta", |
| 495 | +"output.text.done", |
| 496 | +]); |
| 497 | +expect(onTalkEvent.mock.calls.map(([event]) => event.payload)).toEqual([ |
| 498 | +{ text: "hi" }, |
| 499 | +{ text: "hi there" }, |
| 500 | +]); |
| 501 | +transport.stop(); |
| 502 | +}, |
| 503 | +); |
| 504 | + |
433 | 505 | it("aborts an in-flight OpenAI tool consult when the transport stops", async () => { |
434 | 506 | vi.stubGlobal( |
435 | 507 | "fetch", |
|