Model selection

The speech_model connection parameter lets you specify which model to use for streaming transcription.

Available models

NameParameterDescription
Universal-3 Pro Streaming"speech_model": "u3-rt-pro"Our highest accuracy model with native multilingual code switching, entity accuracy, performance across varying audio, and prompting support.
Universal-Streaming English (default)"speech_model": "universal-streaming-english"Our low-latency streaming model optimized for real-time English transcription.
Universal-Streaming Multilingual"speech_model": "universal-streaming-multilingual"Our low-latency streaming model optimized for real-time multilingual transcription in English, Spanish, French, German, Italian, and Portuguese.
Whisper Streaming"speech_model": "whisper-rt"Transcribe audio streams in 99+ languages using the WhisperLiveKit model with automatic language detection.

Choosing a model

FeatureUniversal-3-Pro StreamingUniversal-Streaming EnglishUniversal-Streaming MultilingualWhisper Streaming
LatencyFastFastestFastModerate
Partial transcriptsYesYesYesYes
MultilingualNative Code SwitchingNoPer Turn99+ languages (auto-detected)
Entity accuracyBestOkayOkayOkay
Disfluencies & filler wordsYesNoNoNo
Language detectionYesNoYesYes (with confidence scores)
Non-speech tagsNoNoNoYes ([Silence], [Music], etc.)
CustomizationKeyterms prompting (known context) + Native prompting (unknown context)Keyterms prompting (known context)Keyterms prompting (known context)No

For detailed setup and configuration of Universal-3-Pro streaming, see the Universal-3 Pro Streaming page. For prompting guidance, see the Prompting guide.

For detailed setup and configuration of Whisper streaming, see this page.

End-to-end example

You can select a model by setting the speech_model connection parameter when connecting to the streaming API:

1import pyaudio
2import websocket
3import json
4import threading
5import time
6from urllib.parse import urlencode
7
8YOUR_API_KEY = "<YOUR_API_KEY>"
9
10CONNECTION_PARAMS = {
11 "sample_rate": 16000,
12 "speech_model": "u3-rt-pro", # or "universal-streaming-english", "universal-streaming-multilingual", "whisper-rt"
13 "min_turn_silence": 100,
14 "max_turn_silence": 1200,
15 # "format_turns": True, # Whether to return formatted final transcripts (not applicable to u3-rt-pro)
16}
17API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
18API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
19
20FRAMES_PER_BUFFER = 800
21SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
22CHANNELS = 1
23FORMAT = pyaudio.paInt16
24
25audio = None
26stream = None
27ws_app = None
28audio_thread = None
29stop_event = threading.Event()
30
31def on_open(ws):
32 print("WebSocket connection opened.")
33 def stream_audio():
34 global stream
35 while not stop_event.is_set():
36 try:
37 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
38 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
39 except Exception as e:
40 print(f"Error streaming audio: {e}")
41 break
42
43 global audio_thread
44 audio_thread = threading.Thread(target=stream_audio)
45 audio_thread.daemon = True
46 audio_thread.start()
47
48def on_message(ws, message):
49 try:
50 data = json.loads(message)
51 msg_type = data.get("type")
52
53 if msg_type == "Begin":
54 print(f"Session began: ID={data.get('id')}")
55 elif msg_type == "Turn":
56 transcript = data.get("transcript", "")
57 end_of_turn = data.get("end_of_turn", False)
58 if end_of_turn:
59 print(f"\r{' ' * 80}\r{transcript}")
60 else:
61 print(f"\r{transcript}", end="")
62 elif msg_type == "Termination":
63 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
64 except Exception as e:
65 print(f"Error handling message: {e}")
66
67def on_error(ws, error):
68 print(f"\nWebSocket Error: {error}")
69 stop_event.set()
70
71def on_close(ws, close_status_code, close_msg):
72 print(f"\nWebSocket Disconnected: Status={close_status_code}")
73 global stream, audio
74 stop_event.set()
75 if stream:
76 if stream.is_active():
77 stream.stop_stream()
78 stream.close()
79 if audio:
80 audio.terminate()
81
82def run():
83 global audio, stream, ws_app
84
85 audio = pyaudio.PyAudio()
86 stream = audio.open(
87 input=True,
88 frames_per_buffer=FRAMES_PER_BUFFER,
89 channels=CHANNELS,
90 format=FORMAT,
91 rate=SAMPLE_RATE,
92 )
93 print("Speak into your microphone. Press Ctrl+C to stop.")
94
95 ws_app = websocket.WebSocketApp(
96 API_ENDPOINT,
97 header={"Authorization": YOUR_API_KEY},
98 on_open=on_open,
99 on_message=on_message,
100 on_error=on_error,
101 on_close=on_close,
102 )
103
104 ws_thread = threading.Thread(target=ws_app.run_forever)
105 ws_thread.daemon = True
106 ws_thread.start()
107
108 try:
109 while ws_thread.is_alive():
110 time.sleep(0.1)
111 except KeyboardInterrupt:
112 print("\nStopping...")
113 stop_event.set()
114 if ws_app and ws_app.sock and ws_app.sock.connected:
115 ws_app.send(json.dumps({"type": "Terminate"}))
116 time.sleep(2)
117 if ws_app:
118 ws_app.close()
119 ws_thread.join(timeout=2.0)
120
121if __name__ == "__main__":
122 run()