Migration guide: Speechmatics to AssemblyAI

This guide walks through the process of migrating from Speechmatics to AssemblyAI for streaming Speech-to-text.

Get started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for a free account and get your API key from your dashboard.

Side-by-side code comparison

Below is a side-by-side comparison of a basic Python code snippet to transcribe streaming audio by Speechmatics and AssemblyAI.

1import pyaudio
2import websocket
3import json
4import threading
5import time
6
7# --- Configuration ---
8YOUR_API_KEY = "YOUR-API-KEY" # Replace with your actual API key
9
10CONNECTION_PARAMS = {
11 "language": "en",
12 "enable_partials": True,
13 "max_delay": 2.0
14}
15API_ENDPOINT = "wss://eu2.rt.speechmatics.com/v2/en"
16
17# Audio Configuration
18FRAMES_PER_BUFFER = 1024 # Chunk size
19SAMPLE_RATE = None # Will be set based on device capabilities
20CHANNELS = 1
21FORMAT = pyaudio.paFloat32 # Speechmatics uses float32 format
22
23# Global variables for audio stream and websocket
24audio = None
25stream = None
26ws_app = None
27audio_thread = None
28stop_event = threading.Event() # To signal the audio thread to stop
29audio_seq_no = 0 # Track number of audio chunks sent
30
31# --- WebSocket Event Handlers ---
32def on_open(ws):
33 """Called when the WebSocket connection is established."""
34 print("WebSocket connection opened.")
35 print(f"Connected to: {API_ENDPOINT}")
36
37 # Send StartRecognition message
38 start_message = {
39 "message": "StartRecognition",
40 "audio_format": {
41 "type": "raw",
42 "encoding": "pcm_f32le",
43 "sample_rate": SAMPLE_RATE
44 },
45 "transcription_config": {
46 "language": CONNECTION_PARAMS["language"],
47 "enable_partials": CONNECTION_PARAMS["enable_partials"],
48 "max_delay": CONNECTION_PARAMS["max_delay"]
49 }
50 }
51 ws.send(json.dumps(start_message))
52
53def on_message(ws, message):
54 global audio_seq_no
55
56 try:
57 data = json.loads(message)
58 msg_type = data.get('message')
59
60 if msg_type == "RecognitionStarted":
61 session_id = data.get('id')
62 print(f"\nSession began: ID={session_id}")
63
64 # Start sending audio data in a separate thread
65 def stream_audio():
66 global audio_seq_no, stream
67 print("Starting audio streaming...")
68 while not stop_event.is_set():
69 try:
70 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
71 # Send audio data as binary message
72 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
73 audio_seq_no += 1
74 except Exception as e:
75 print(f"Error streaming audio: {e}")
76 # If stream read fails, likely means it's closed, stop the loop
77 break
78 print("Audio streaming stopped.")
79
80 global audio_thread
81 audio_thread = threading.Thread(target=stream_audio)
82 audio_thread.daemon = (
83 True # Allow main thread to exit even if this thread is running
84 )
85 audio_thread.start()
86
87 elif msg_type == "AddPartialTranscript":
88 transcript = data.get('metadata', {}).get('transcript', '')
89 if transcript:
90 print(f"\r{transcript}", end='')
91
92 elif msg_type == "AddTranscript":
93 transcript = data.get('metadata', {}).get('transcript', '')
94 if transcript:
95 # Clear previous line for final messages
96 print('\r' + ' ' * 80 + '\r', end='')
97 print(transcript)
98
99 elif msg_type == "EndOfTranscript":
100 print("\nSession Terminated: Transcription complete")
101
102 elif msg_type == "Error":
103 error_type = data.get('type')
104 reason = data.get('reason')
105 print(f"\nWebSocket Error: {error_type} - {reason}")
106 stop_event.set()
107
108 except json.JSONDecodeError as e:
109 print(f"Error decoding message: {e}")
110 except Exception as e:
111 print(f"Error handling message: {e}")
112
113def on_error(ws, error):
114 """Called when a WebSocket error occurs."""
115 print(f"\nWebSocket Error: {error}")
116 # Attempt to signal stop on error
117 stop_event.set()
118
119def on_close(ws, close_status_code, close_msg):
120 """Called when the WebSocket connection is closed."""
121 print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
122 # Ensure audio resources are released
123 global stream, audio
124 stop_event.set() # Signal audio thread just in case it's still running
125
126 if stream:
127 if stream.is_active():
128 stream.stop_stream()
129 stream.close()
130 stream = None
131 if audio:
132 audio.terminate()
133 audio = None
134 # Try to join the audio thread to ensure clean exit
135 if audio_thread and audio_thread.is_alive():
136 audio_thread.join(timeout=1.0)
137
138# --- Main Execution ---
139def run():
140 global audio, stream, ws_app, SAMPLE_RATE
141
142 # Initialize PyAudio
143 audio = pyaudio.PyAudio()
144
145 # Get default input device (can alter to specify specific device)
146 default_device = audio.get_default_input_device_info()
147 device_index = default_device['index']
148 SAMPLE_RATE = int(audio.get_device_info_by_index(device_index)['defaultSampleRate'])
149
150 print(f"Using microphone: {default_device['name']}")
151
152 # Open microphone stream
153 try:
154 stream = audio.open(
155 input=True,
156 frames_per_buffer=FRAMES_PER_BUFFER,
157 channels=CHANNELS,
158 format=FORMAT,
159 rate=SAMPLE_RATE,
160 input_device_index=device_index
161 )
162 print("Microphone stream opened successfully.")
163 print("Speak into your microphone. Press Ctrl+C to stop.")
164 except Exception as e:
165 print(f"Error opening microphone stream: {e}")
166 if audio:
167 audio.terminate()
168 return # Exit if microphone cannot be opened
169
170 # Create WebSocketApp
171 ws_app = websocket.WebSocketApp(
172 API_ENDPOINT,
173 header={"Authorization": f"Bearer {YOUR_API_KEY}"}, # Speechmatics uses Bearer token
174 on_open=on_open,
175 on_message=on_message,
176 on_error=on_error,
177 on_close=on_close,
178 )
179
180 # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt
181 ws_thread = threading.Thread(target=lambda: ws_app.run_forever(ping_interval=30, ping_timeout=10))
182 ws_thread.daemon = True
183 ws_thread.start()
184
185 try:
186 # Keep main thread alive until interrupted
187 while ws_thread.is_alive():
188 time.sleep(0.1)
189 except KeyboardInterrupt:
190 print("\nCtrl+C received. Stopping...")
191 stop_event.set() # Signal audio thread to stop
192
193 # Send EndOfStream message to the server
194 if ws_app and ws_app.sock and ws_app.sock.connected:
195 try:
196 end_message = {
197 "message": "EndOfStream",
198 "last_seq_no": audio_seq_no
199 }
200 print(f"Sending termination message: {json.dumps(end_message)}")
201 ws_app.send(json.dumps(end_message))
202 # Give a moment for messages to process before forceful close
203 time.sleep(1)
204 except Exception as e:
205 print(f"Error sending termination message: {e}")
206
207 # Close the WebSocket connection (will trigger on_close)
208 if ws_app:
209 ws_app.close()
210
211 # Wait for WebSocket thread to finish
212 ws_thread.join(timeout=2.0)
213
214 except Exception as e:
215 print(f"\nAn unexpected error occurred: {e}")
216 stop_event.set()
217 if ws_app:
218 ws_app.close()
219 ws_thread.join(timeout=2.0)
220
221 finally:
222 # Final cleanup (already handled in on_close, but good as a fallback)
223 if stream and stream.is_active():
224 stream.stop_stream()
225 if stream:
226 stream.close()
227 if audio:
228 audio.terminate()
229 print("Cleanup complete. Exiting.")
230
231if __name__ == "__main__":
232 run()

Step 1: Install dependencies

1

Install the required Python packages.

$pip install websocket-client pyaudio

Step 2: Configure the API key

In this step, you’ll configure your API key to authenticate your requests.

1

Navigate to API Keys in your account settings and copy your API key.

2

Store your API key in a variable. Replace <YOUR_API_KEY> with your copied API key.

1import pyaudio
2import websocket
3import json
4import threading
5import time
6
7YOUR_API_KEY = "YOUR-API-KEY"
1import requests
2
3def generate_temp_token(api_key, ttl=60):
4 """Generate a temporary authentication token that expires after the specified time."""
5 url = "https://mp.speechmatics.com/v1/api_keys?type=rt"
6 headers = {
7 "Content-Type": "application/json",
8 "Authorization": f"Bearer {api_key}"
9 }
10 payload = {
11 "ttl": ttl
12 }
13
14 response = requests.post(url, json=payload, headers=headers)
15 data = response.json()
16 return data.get("key_value")
Token usage

Instead of authorizing your request with YOUR_API_KEY (via request header), use the temporary token generated by this function when establishing the WebSocket connection.

1 API_ENDPOINT= f"wss://eu2.rt.speechmatics.com/v2?jwt={generate_temp_token(api_key)}"
2 ws_app = websocket.WebSocketApp(
3 API_ENDPOINT,
4 on_open=on_open,
5 on_message=on_message,
6 on_error=on_error,
7 on_close=on_close,
8 )

Step 3: Set up audio configuration

1

Configure the audio settings for your microphone stream.

1import pyaudio
2
3# Audio Configuration
4FRAMES_PER_BUFFER = 1024 # Chunk size
5SAMPLE_RATE = None # Will be set based on device capabilities
6CHANNELS = 1
7FORMAT = pyaudio.paFloat32 # Speechmatics uses float32 format
8
9# Global variables for audio stream and websocket
10audio = None
11stream = None
12ws_app = None
13audio_thread = None
14stop_event = threading.Event() # To signal the audio thread to stop
15audio_seq_no = 0 # Track number of audio chunks sent
16
17def run():
18 global audio, stream, ws_app, SAMPLE_RATE
19
20 # Initialize PyAudio
21 audio = pyaudio.PyAudio()
22
23 # Get default input device (can alter to specify specific device)
24 default_device = audio.get_default_input_device_info()
25 device_index = default_device['index']
26 SAMPLE_RATE = int(audio.get_device_info_by_index(device_index)['defaultSampleRate'])
27
28 print(f"Using microphone: {default_device['name']}")
29
30 # Open microphone stream
31 try:
32 stream = audio.open(
33 input=True,
34 frames_per_buffer=FRAMES_PER_BUFFER,
35 channels=CHANNELS,
36 format=FORMAT,
37 rate=SAMPLE_RATE,
38 input_device_index=device_index
39 )
40 print("Microphone stream opened successfully.")
41 print("Speak into your microphone. Press Ctrl+C to stop.")
42 except Exception as e:
43 print(f"Error opening microphone stream: {e}")
44 if audio:
45 audio.terminate()
46 return # Exit if microphone cannot be opened
Sample rate

Speechmatics recommends using a 16 kHz sample rate for speech audio. Anything higher will be downsampled server-side.

Audio data format

If you want to stream data from elsewhere, make sure that your audio data is in the following format:

  • Single-channel
  • PCM16 (default) or Mu-law encoding (see Specifying the encoding)
  • A sample rate that matches the value of the sample_rate parameter (16 kHz is recommended)
  • 50 milliseconds of audio per message (larger chunk sizes are workable, but may result in latency fluctuations)

Step 4: Create event handlers

In this step, you’ll set up callback functions that handle the different events.

1

Create functions to handle the events from the real-time service.

1import json
2
3def on_open(ws):
4 """Called when the WebSocket connection is established."""
5 print("WebSocket connection opened.")
6 print(f"Connected to: {API_ENDPOINT}")
7
8 # Send StartRecognition message
9 start_message = {
10 "message": "StartRecognition",
11 "audio_format": {
12 "type": "raw",
13 "encoding": "pcm_f32le",
14 "sample_rate": SAMPLE_RATE
15 },
16 "transcription_config": {
17 "language": CONNECTION_PARAMS["language"],
18 "enable_partials": CONNECTION_PARAMS["enable_partials"],
19 "max_delay": CONNECTION_PARAMS["max_delay"]
20 }
21 }
22 ws.send(json.dumps(start_message))
23
24def on_error(ws, error):
25 """Called when a WebSocket error occurs."""
26 print(f"\nWebSocket Error: {error}")
27 # Attempt to signal stop on error
28 stop_event.set()
29
30def on_close(ws, close_status_code, close_msg):
31 """Called when the WebSocket connection is closed."""
32 print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
33 # Ensure audio resources are released
34 global stream, audio
35 stop_event.set() # Signal audio thread just in case it's still running
36
37 if stream:
38 if stream.is_active():
39 stream.stop_stream()
40 stream.close()
41 stream = None
42 if audio:
43 audio.terminate()
44 audio = None
45 # Try to join the audio thread to ensure clean exit
46 if audio_thread and audio_thread.is_alive():
47 audio_thread.join(timeout=1.0)
Connection configuration

Speechmatics requires a handshake where the connection configuration is specified before audio is streamed. AssemblyAI allows you to configure the connection via query parameters in the URL and start streaming audio immediately.

The Speechmatics handshake begins when on_open sends a StartRecognition message to configure the session. Audio streaming only starts after the RecognitionStarted message type is parsed and confirmed in the on_message callback.

2

Create another function to handle transcripts.

Speechmatics has separate partial (AddPartialTranscript) and final (AddTranscript) transcripts. The terminate session message is EndOfTranscript.

AssemblyAI instead uses a Turn object with a turn_is_formatted boolean flag to indicate finality. The terminate session message is Termination. For more on the Turn object, see Streaming Core concepts section.

1def on_message(ws, message):
2 global audio_seq_no
3
4 try:
5 data = json.loads(message)
6 msg_type = data.get('message')
7
8 if msg_type == "RecognitionStarted":
9 session_id = data.get('id')
10 print(f"\nSession began: ID={session_id}")
11
12 # Start sending audio data in a separate thread
13 def stream_audio():
14 global audio_seq_no, stream
15 print("Starting audio streaming...")
16 while not stop_event.is_set():
17 try:
18 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
19 # Send audio data as binary message
20 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
21 audio_seq_no += 1
22 except Exception as e:
23 print(f"Error streaming audio: {e}")
24 # If stream read fails, likely means it's closed, stop the loop
25 break
26 print("Audio streaming stopped.")
27
28 global audio_thread
29 audio_thread = threading.Thread(target=stream_audio)
30 audio_thread.daemon = (
31 True # Allow main thread to exit even if this thread is running
32 )
33 audio_thread.start()
34
35 elif msg_type == "AddPartialTranscript":
36 transcript = data.get('metadata', {}).get('transcript', '')
37 if transcript:
38 print(f"\r{transcript}", end='')
39
40 elif msg_type == "AddTranscript":
41 transcript = data.get('metadata', {}).get('transcript', '')
42 if transcript:
43 # Clear previous line for final messages
44 print('\r' + ' ' * 80 + '\r', end='')
45 print(transcript)
46
47 elif msg_type == "EndOfTranscript":
48 print("\nSession Terminated: Transcription complete")
49
50 elif msg_type == "Error":
51 error_type = data.get('type')
52 reason = data.get('reason')
53 print(f"\nWebSocket Error: {error_type} - {reason}")
54 stop_event.set()
55
56 except json.JSONDecodeError as e:
57 print(f"Error decoding message: {e}")
58 except Exception as e:
59 print(f"Error handling message: {e}")
Transcript message structure

Please note the difference in transcript message structure below:

1# Speechmatics
2{
3 "message": "AddPartialTranscript",
4 "metadata": {
5 "transcript": "hello world"
6 },
7 # Other transcript data...
8}
9
10# AssemblyAI
11{
12 "type": "Turn",
13 "transcript": "hello world",
14 "turn_is_formatted": false,
15 # Other transcript data...
16}

Step 5: Connect and start transcription

1

To stream audio, establish a connection to the API via WebSockets.

Create a WebSocket connection to the Realtime service.

1def run():
2 global audio, stream, ws_app, SAMPLE_RATE
3 # Skipping audio/microphone setup code...
4
5 # Create WebSocketApp
6 ws_app = websocket.WebSocketApp(
7 API_ENDPOINT,
8 header={"Authorization": f"Bearer {YOUR_API_KEY}"}, # Speechmatics uses Bearer token
9 on_open=on_open,
10 on_message=on_message,
11 on_error=on_error,
12 on_close=on_close,
13 )
14
15 # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt
16 ws_thread = threading.Thread(target=lambda: ws_app.run_forever(ping_interval=30, ping_timeout=10))
17 ws_thread.daemon = True
18 ws_thread.start()
Authorization

Note that while both services use an Authorization header to authenticate the WebSocket connection, Speechmatics uses a Bearer prefix, while AssemblyAI does not.

Step 6: Close the connection

1

Keep the main thread alive until interrupted, handle keyboard interrupts and thrown exceptions, and clean up upon closing of the WebSocket connection.

1def run():
2 global audio, stream, ws_app, SAMPLE_RATE
3 # Skipping audio/microphone setup and WebSocket connection code...
4
5 try:
6 # Keep main thread alive until interrupted
7 while ws_thread.is_alive():
8 time.sleep(0.1)
9 except KeyboardInterrupt:
10 print("\nCtrl+C received. Stopping...")
11 stop_event.set() # Signal audio thread to stop
12
13 # Send EndOfStream message to the server
14 if ws_app and ws_app.sock and ws_app.sock.connected:
15 try:
16 end_message = {
17 "message": "EndOfStream",
18 "last_seq_no": audio_seq_no
19 }
20 print(f"Sending termination message: {json.dumps(end_message)}")
21 ws_app.send(json.dumps(end_message))
22 # Give a moment for messages to process before forceful close
23 time.sleep(1)
24 except Exception as e:
25 print(f"Error sending termination message: {e}")
26
27 # Close the WebSocket connection (will trigger on_close)
28 if ws_app:
29 ws_app.close()
30
31 # Wait for WebSocket thread to finish
32 ws_thread.join(timeout=2.0)
33
34 except Exception as e:
35 print(f"\nAn unexpected error occurred: {e}")
36 stop_event.set()
37 if ws_app:
38 ws_app.close()
39 ws_thread.join(timeout=2.0)
40
41 finally:
42 # Final cleanup (already handled in on_close, but good as a fallback)
43 if stream and stream.is_active():
44 stream.stop_stream()
45 if stream:
46 stream.close()
47 if audio:
48 audio.terminate()
49 print("Cleanup complete. Exiting.")

The connection will close automatically when you press Ctrl+C. In both cases, the on_close handler will clean up the audio resources.

Step 7: Execute the main function

Finally, run the main function to start the main execution.

1if __name__ == "__main__":
2 run()

Next steps

To learn more about both Streaming APIs, their key differences, and how to best migrate, see the following resources:

AssemblyAI

Speechmatics

Need some help?

If you get stuck or have any other questions, contact our support team at support@assemblyai.com or create a support ticket.