Getting startedEnd-to-end examples

Real-time live captioner

Stream audio from your microphone with keyterms prompting for domain-specific accuracy, ideal for live events, accessibility, and broadcast captioning.

Products used: Streaming STT + Universal-3 Pro + keyterms prompting

Model selection: Uses u3-rt-pro for sub-300ms latency with format_turns enabled for clean, readable captions.

1# pip install pyaudio websocket-client
2import pyaudio
3import websocket
4import json
5import threading
6import time
7from urllib.parse import urlencode
8
9# ── Config ────────────────────────────────────────────────────
10YOUR_API_KEY = "YOUR_API_KEY"
11
12# Add domain-specific terms to boost recognition accuracy
13KEYTERMS = ["AssemblyAI", "Universal-3 Pro", "LLM Gateway", "speech-to-text"]
14
15CONNECTION_PARAMS = {
16 "sample_rate": 16000,
17 "speech_model": "u3-rt-pro",
18 "format_turns": True,
19 "keyterms_prompt": KEYTERMS,
20}
21
22API_ENDPOINT = (
23 f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS, doseq=True)}"
24)
25
26# Audio settings
27FRAMES_PER_BUFFER = 800
28SAMPLE_RATE = 16000
29stop_event = threading.Event()
30caption_count = 0
31
32def on_open(ws):
33 print(f"Live captioning started — keyterms: {', '.join(KEYTERMS)}")
34 print("Speak into your microphone. Press Ctrl+C to stop.\n")
35 print("-" * 60)
36
37 def stream_audio():
38 audio = pyaudio.PyAudio()
39 stream = audio.open(
40 input=True, frames_per_buffer=FRAMES_PER_BUFFER,
41 channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE,
42 )
43 while not stop_event.is_set():
44 try:
45 data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
46 ws.send(data, websocket.ABNF.OPCODE_BINARY)
47 except Exception:
48 break
49 stream.stop_stream()
50 stream.close()
51 audio.terminate()
52
53 threading.Thread(target=stream_audio, daemon=True).start()
54
55def on_message(ws, message):
56 global caption_count
57 data = json.loads(message)
58
59 if data.get("type") == "Turn":
60 transcript = data.get("transcript", "")
61 if data.get("end_of_turn") and transcript:
62 caption_count += 1
63 print(f"\r[{caption_count:03d}] {transcript}")
64 elif transcript:
65 # Show partial (live) caption
66 print(f"\r >> {transcript[-70:]}", end="", flush=True)
67
68 elif data.get("type") == "Termination":
69 duration = data.get("audio_duration_seconds", 0)
70 print(f"\n{'=' * 60}")
71 print(f"Session ended — {caption_count} captions, {duration}s of audio")
72
73def on_error(ws, error):
74 print(f"\nError: {error}")
75 stop_event.set()
76
77def on_close(ws, code, msg):
78 stop_event.set()
79
80ws_app = websocket.WebSocketApp(
81 API_ENDPOINT,
82 header={"Authorization": YOUR_API_KEY},
83 on_open=on_open, on_message=on_message,
84 on_error=on_error, on_close=on_close,
85)
86
87ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True)
88ws_thread.start()
89
90try:
91 while ws_thread.is_alive():
92 time.sleep(0.1)
93except KeyboardInterrupt:
94 print("\n\nStopping...")
95 stop_event.set()
96 if ws_app.sock and ws_app.sock.connected:
97 ws_app.send(json.dumps({"type": "Terminate"}))
98 time.sleep(2)
99 ws_app.close()
Live captioning started — keyterms: AssemblyAI, Universal-3 Pro, LLM Gateway, speech-to-text
Speak into your microphone. Press Ctrl+C to stop.
------------------------------------------------------------
[001] Welcome everyone to today's demo of AssemblyAI's speech-to-text platform.
[002] We'll be showing you how Universal-3 Pro handles real-time transcription.
[003] The LLM Gateway integration lets you add AI analysis on top of your
transcripts without switching providers.
============================================================
Session ended — 3 captions, 24s of audio

See the End-to-end examples overview for all available pipelines.