Real-time live captioner

# pip install pyaudio websocket-client
import pyaudio
import websocket
import json
import threading
import time
from urllib.parse import urlencode
# ── Config ────────────────────────────────────────────────────
YOUR_API_KEY = "YOUR_API_KEY"
# Add domain-specific terms to boost recognition accuracy
KEYTERMS = ["AssemblyAI", "Universal-3 Pro", "LLM Gateway", "speech-to-text"]
CONNECTION_PARAMS = {
    "sample_rate": 16000,
    "speech_model": "u3-rt-pro",
    "format_turns": True,
    "keyterms_prompt": KEYTERMS,
}
API_ENDPOINT = (
    f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS, doseq=True)}"
)
# Audio settings
FRAMES_PER_BUFFER = 800
SAMPLE_RATE = 16000
stop_event = threading.Event()
caption_count = 0
def on_open(ws):
    print(f"Live captioning started — keyterms: {', '.join(KEYTERMS)}")
    print("Speak into your microphone. Press Ctrl+C to stop.\n")
    print("-" * 60)
    def stream_audio():
        audio = pyaudio.PyAudio()
        stream = audio.open(
            input=True, frames_per_buffer=FRAMES_PER_BUFFER,
            channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE,
        )
        while not stop_event.is_set():
            try:
                data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                ws.send(data, websocket.ABNF.OPCODE_BINARY)
            except Exception:
                break
        stream.stop_stream()
        stream.close()
        audio.terminate()
    threading.Thread(target=stream_audio, daemon=True).start()
def on_message(ws, message):
    global caption_count
    data = json.loads(message)
    if data.get("type") == "Turn":
        transcript = data.get("transcript", "")
        if data.get("end_of_turn") and transcript:
            caption_count += 1
            print(f"\r[{caption_count:03d}] {transcript}")
        elif transcript:
            # Show partial (live) caption
            print(f"\r  >> {transcript[-70:]}", end="", flush=True)
    elif data.get("type") == "Termination":
        duration = data.get("audio_duration_seconds", 0)
        print(f"\n{'=' * 60}")
        print(f"Session ended — {caption_count} captions, {duration}s of audio")
def on_error(ws, error):
    print(f"\nError: {error}")
    stop_event.set()
def on_close(ws, code, msg):
    stop_event.set()
ws_app = websocket.WebSocketApp(
    API_ENDPOINT,
    header={"Authorization": YOUR_API_KEY},
    on_open=on_open, on_message=on_message,
    on_error=on_error, on_close=on_close,
)
ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True)
ws_thread.start()
try:
    while ws_thread.is_alive():
        time.sleep(0.1)
except KeyboardInterrupt:
    print("\n\nStopping...")
    stop_event.set()
    if ws_app.sock and ws_app.sock.connected:
        ws_app.send(json.dumps({"type": "Terminate"}))
        time.sleep(2)
    ws_app.close()

Live captioning started — keyterms: AssemblyAI, Universal-3 Pro, LLM Gateway, speech-to-text
Speak into your microphone. Press Ctrl+C to stop.
------------------------------------------------------------
[001] Welcome everyone to today's demo of AssemblyAI's speech-to-text platform.
[002] We'll be showing you how Universal-3 Pro handles real-time transcription.
[003] The LLM Gateway integration lets you add AI analysis on top of your
      transcripts without switching providers.
============================================================
Session ended — 3 captions, 24s of audio

# pip install pyaudio websocket-client
import pyaudio
import websocket
import json
import threading
import time
from urllib.parse import urlencode
# ── Config ────────────────────────────────────────────────────
YOUR_API_KEY = "YOUR_API_KEY"
# Add domain-specific terms to boost recognition accuracy
KEYTERMS = ["AssemblyAI", "Universal-3 Pro", "LLM Gateway", "speech-to-text"]
CONNECTION_PARAMS = {
    "sample_rate": 16000,
    "speech_model": "u3-rt-pro",
    "format_turns": True,
    "keyterms_prompt": KEYTERMS,
}
API_ENDPOINT = (
    f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS, doseq=True)}"
)
# Audio settings
FRAMES_PER_BUFFER = 800
SAMPLE_RATE = 16000
stop_event = threading.Event()
caption_count = 0
def on_open(ws):
    print(f"Live captioning started — keyterms: {', '.join(KEYTERMS)}")
    print("Speak into your microphone. Press Ctrl+C to stop.\n")
    print("-" * 60)
    def stream_audio():
        audio = pyaudio.PyAudio()
        stream = audio.open(
            input=True, frames_per_buffer=FRAMES_PER_BUFFER,
            channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE,
        )
        while not stop_event.is_set():
            try:
                data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                ws.send(data, websocket.ABNF.OPCODE_BINARY)
            except Exception:
                break
        stream.stop_stream()
        stream.close()
        audio.terminate()
    threading.Thread(target=stream_audio, daemon=True).start()
def on_message(ws, message):
    global caption_count
    data = json.loads(message)
    if data.get("type") == "Turn":
        transcript = data.get("transcript", "")
        if data.get("end_of_turn") and transcript:
            caption_count += 1
            print(f"\r[{caption_count:03d}] {transcript}")
        elif transcript:
            # Show partial (live) caption
            print(f"\r  >> {transcript[-70:]}", end="", flush=True)
    elif data.get("type") == "Termination":
        duration = data.get("audio_duration_seconds", 0)
        print(f"\n{'=' * 60}")
        print(f"Session ended — {caption_count} captions, {duration}s of audio")
def on_error(ws, error):
    print(f"\nError: {error}")
    stop_event.set()
def on_close(ws, code, msg):
    stop_event.set()
ws_app = websocket.WebSocketApp(
    API_ENDPOINT,
    header={"Authorization": YOUR_API_KEY},
    on_open=on_open, on_message=on_message,
    on_error=on_error, on_close=on_close,
)
ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True)
ws_thread.start()
try:
    while ws_thread.is_alive():
        time.sleep(0.1)
except KeyboardInterrupt:
    print("\n\nStopping...")
    stop_event.set()
    if ws_app.sock and ws_app.sock.connected:
        ws_app.send(json.dumps({"type": "Terminate"}))
        time.sleep(2)
    ws_app.close()

Live captioning started — keyterms: AssemblyAI, Universal-3 Pro, LLM Gateway, speech-to-text
Speak into your microphone. Press Ctrl+C to stop.
------------------------------------------------------------
[001] Welcome everyone to today's demo of AssemblyAI's speech-to-text platform.
[002] We'll be showing you how Universal-3 Pro handles real-time transcription.
[003] The LLM Gateway integration lets you add AI analysis on top of your
      transcripts without switching providers.
============================================================
Session ended — 3 captions, 24s of audio

1	# pip install pyaudio websocket-client
2	import pyaudio
3	import websocket
4	import json
5	import threading
6	import time
7	from urllib.parse import urlencode
8
9	# ── Config ────────────────────────────────────────────────────
10	YOUR_API_KEY = "YOUR_API_KEY"
11
12	# Add domain-specific terms to boost recognition accuracy
13	KEYTERMS = ["AssemblyAI", "Universal-3 Pro", "LLM Gateway", "speech-to-text"]
14
15	CONNECTION_PARAMS = {
16	"sample_rate": 16000,
17	"speech_model": "u3-rt-pro",
18	"format_turns": True,
19	"keyterms_prompt": KEYTERMS,
20	}
21
22	API_ENDPOINT = (
23	f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS, doseq=True)}"
24	)
25
26	# Audio settings
27	FRAMES_PER_BUFFER = 800
28	SAMPLE_RATE = 16000
29	stop_event = threading.Event()
30	caption_count = 0
31
32	def on_open(ws):
33	print(f"Live captioning started — keyterms: {', '.join(KEYTERMS)}")
34	print("Speak into your microphone. Press Ctrl+C to stop.\n")
35	print("-" * 60)
36
37	def stream_audio():
38	audio = pyaudio.PyAudio()
39	stream = audio.open(
40	input=True, frames_per_buffer=FRAMES_PER_BUFFER,
41	channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE,
42	)
43	while not stop_event.is_set():
44	try:
45	data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
46	ws.send(data, websocket.ABNF.OPCODE_BINARY)
47	except Exception:
48	break
49	stream.stop_stream()
50	stream.close()
51	audio.terminate()
52
53	threading.Thread(target=stream_audio, daemon=True).start()
54
55	def on_message(ws, message):
56	global caption_count
57	data = json.loads(message)
58
59	if data.get("type") == "Turn":
60	transcript = data.get("transcript", "")
61	if data.get("end_of_turn") and transcript:
62	caption_count += 1
63	print(f"\r[{caption_count:03d}] {transcript}")
64	elif transcript:
65	# Show partial (live) caption
66	print(f"\r >> {transcript[-70:]}", end="", flush=True)
67
68	elif data.get("type") == "Termination":
69	duration = data.get("audio_duration_seconds", 0)
70	print(f"\n{'=' * 60}")
71	print(f"Session ended — {caption_count} captions, {duration}s of audio")
72
73	def on_error(ws, error):
74	print(f"\nError: {error}")
75	stop_event.set()
76
77	def on_close(ws, code, msg):
78	stop_event.set()
79
80	ws_app = websocket.WebSocketApp(
81	API_ENDPOINT,
82	header={"Authorization": YOUR_API_KEY},
83	on_open=on_open, on_message=on_message,
84	on_error=on_error, on_close=on_close,
85	)
86
87	ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True)
88	ws_thread.start()
89
90	try:
91	while ws_thread.is_alive():
92	time.sleep(0.1)
93	except KeyboardInterrupt:
94	print("\n\nStopping...")
95	stop_event.set()
96	if ws_app.sock and ws_app.sock.connected:
97	ws_app.send(json.dumps({"type": "Terminate"}))
98	time.sleep(2)
99	ws_app.close()

1	# pip install pyaudio websocket-client
2	import pyaudio
3	import websocket
4	import json
5	import threading
6	import time
7	from urllib.parse import urlencode
8
9	# ── Config ────────────────────────────────────────────────────
10	YOUR_API_KEY = "YOUR_API_KEY"
11
12	# Add domain-specific terms to boost recognition accuracy
13	KEYTERMS = ["AssemblyAI", "Universal-3 Pro", "LLM Gateway", "speech-to-text"]
14
15	CONNECTION_PARAMS = {
16	"sample_rate": 16000,
17	"speech_model": "u3-rt-pro",
18	"format_turns": True,
19	"keyterms_prompt": KEYTERMS,
20	}
21
22	API_ENDPOINT = (
23	f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS, doseq=True)}"
24	)
25
26	# Audio settings
27	FRAMES_PER_BUFFER = 800
28	SAMPLE_RATE = 16000
29	stop_event = threading.Event()
30	caption_count = 0
31
32	def on_open(ws):
33	print(f"Live captioning started — keyterms: {', '.join(KEYTERMS)}")
34	print("Speak into your microphone. Press Ctrl+C to stop.\n")
35	print("-" * 60)
36
37	def stream_audio():
38	audio = pyaudio.PyAudio()
39	stream = audio.open(
40	input=True, frames_per_buffer=FRAMES_PER_BUFFER,
41	channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE,
42	)
43	while not stop_event.is_set():
44	try:
45	data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
46	ws.send(data, websocket.ABNF.OPCODE_BINARY)
47	except Exception:
48	break
49	stream.stop_stream()
50	stream.close()
51	audio.terminate()
52
53	threading.Thread(target=stream_audio, daemon=True).start()
54
55	def on_message(ws, message):
56	global caption_count
57	data = json.loads(message)
58
59	if data.get("type") == "Turn":
60	transcript = data.get("transcript", "")
61	if data.get("end_of_turn") and transcript:
62	caption_count += 1
63	print(f"\r[{caption_count:03d}] {transcript}")
64	elif transcript:
65	# Show partial (live) caption
66	print(f"\r >> {transcript[-70:]}", end="", flush=True)
67
68	elif data.get("type") == "Termination":
69	duration = data.get("audio_duration_seconds", 0)
70	print(f"\n{'=' * 60}")
71	print(f"Session ended — {caption_count} captions, {duration}s of audio")
72
73	def on_error(ws, error):
74	print(f"\nError: {error}")
75	stop_event.set()
76
77	def on_close(ws, code, msg):
78	stop_event.set()
79
80	ws_app = websocket.WebSocketApp(
81	API_ENDPOINT,
82	header={"Authorization": YOUR_API_KEY},
83	on_open=on_open, on_message=on_message,
84	on_error=on_error, on_close=on_close,
85	)
86
87	ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True)
88	ws_thread.start()
89
90	try:
91	while ws_thread.is_alive():
92	time.sleep(0.1)
93	except KeyboardInterrupt:
94	print("\n\nStopping...")
95	stop_event.set()
96	if ws_app.sock and ws_app.sock.connected:
97	ws_app.send(json.dumps({"type": "Terminate"}))
98	time.sleep(2)
99	ws_app.close()

Real-time live captioner

Real-time live captioner

Python

JavaScript

Example output

Python

JavaScript

Example output