Migration guide: Speechmatics to AssemblyAI | AssemblyAI

This guide walks through the process of migrating from Speechmatics to AssemblyAI for streaming Speech-to-text.

Get started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for a free account and get your API key from your dashboard.

Side-by-side code comparison

Below is a side-by-side comparison of a basic Python code snippet to transcribe streaming audio by Speechmatics and AssemblyAI.

Speechmatics

AssemblyAI

1 import pyaudio
2 import websocket
3 import json
4 import threading
5 import time
6 
7 # --- Configuration ---
8 YOUR_API_KEY = "YOUR-API-KEY"  # Replace with your actual API key
9 
10 CONNECTION_PARAMS = {
11     "language": "en",
12     "enable_partials": True,
13     "max_delay": 2.0
14 }
15 API_ENDPOINT = "wss://eu2.rt.speechmatics.com/v2/en"
16 
17 # Audio Configuration
18 FRAMES_PER_BUFFER = 1024  # Chunk size
19 SAMPLE_RATE = None  # Will be set based on device capabilities
20 CHANNELS = 1
21 FORMAT = pyaudio.paFloat32  # Speechmatics uses float32 format
22 
23 # Global variables for audio stream and websocket
24 audio = None
25 stream = None
26 ws_app = None
27 audio_thread = None
28 stop_event = threading.Event()  # To signal the audio thread to stop
29 audio_seq_no = 0  # Track number of audio chunks sent
30 
31 # --- WebSocket Event Handlers ---
32 def on_open(ws):
33     """Called when the WebSocket connection is established."""
34     print("WebSocket connection opened.")
35     print(f"Connected to: {API_ENDPOINT}")
36     
37     # Send StartRecognition message
38     start_message = {
39         "message": "StartRecognition",
40         "audio_format": {
41             "type": "raw",
42             "encoding": "pcm_f32le",
43             "sample_rate": SAMPLE_RATE
44         },
45         "transcription_config": {
46             "language": CONNECTION_PARAMS["language"],
47             "enable_partials": CONNECTION_PARAMS["enable_partials"],
48             "max_delay": CONNECTION_PARAMS["max_delay"]
49         }
50     }
51     ws.send(json.dumps(start_message))
52 
53 def on_message(ws, message):
54     global audio_seq_no
55     
56     try:
57         data = json.loads(message)
58         msg_type = data.get('message')
59         
60         if msg_type == "RecognitionStarted":
61             session_id = data.get('id')
62             print(f"\nSession began: ID={session_id}")
63             
64             # Start sending audio data in a separate thread
65             def stream_audio():
66                 global audio_seq_no, stream
67                 print("Starting audio streaming...")
68                 while not stop_event.is_set():
69                     try:
70                         audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
71                         # Send audio data as binary message
72                         ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
73                         audio_seq_no += 1
74                     except Exception as e:
75                         print(f"Error streaming audio: {e}")
76                         # If stream read fails, likely means it's closed, stop the loop
77                         break
78                 print("Audio streaming stopped.")
79 
80             global audio_thread
81             audio_thread = threading.Thread(target=stream_audio)
82             audio_thread.daemon = (
83                 True  # Allow main thread to exit even if this thread is running
84             )
85             audio_thread.start()
86             
87         elif msg_type == "AddPartialTranscript":
88             transcript = data.get('metadata', {}).get('transcript', '')
89             if transcript:
90                 print(f"\r{transcript}", end='')
91                 
92         elif msg_type == "AddTranscript":
93             transcript = data.get('metadata', {}).get('transcript', '')
94             if transcript:
95                 # Clear previous line for final messages
96                 print('\r' + ' ' * 80 + '\r', end='')
97                 print(transcript)
98                 
99         elif msg_type == "EndOfTranscript":
100             print("\nSession Terminated: Transcription complete")
101             
102         elif msg_type == "Error":
103             error_type = data.get('type')
104             reason = data.get('reason')
105             print(f"\nWebSocket Error: {error_type} - {reason}")
106             stop_event.set()
107             
108     except json.JSONDecodeError as e:
109         print(f"Error decoding message: {e}")
110     except Exception as e:
111         print(f"Error handling message: {e}")
112 
113 def on_error(ws, error):
114     """Called when a WebSocket error occurs."""
115     print(f"\nWebSocket Error: {error}")
116     # Attempt to signal stop on error
117     stop_event.set()
118 
119 def on_close(ws, close_status_code, close_msg):
120     """Called when the WebSocket connection is closed."""
121     print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
122     # Ensure audio resources are released
123     global stream, audio
124     stop_event.set()  # Signal audio thread just in case it's still running
125 
126     if stream:
127         if stream.is_active():
128             stream.stop_stream()
129         stream.close()
130         stream = None
131     if audio:
132         audio.terminate()
133         audio = None
134     # Try to join the audio thread to ensure clean exit
135     if audio_thread and audio_thread.is_alive():
136         audio_thread.join(timeout=1.0)
137 
138 # --- Main Execution ---
139 def run():
140     global audio, stream, ws_app, SAMPLE_RATE
141 
142     # Initialize PyAudio
143     audio = pyaudio.PyAudio()
144     
145     # Get default input device (can alter to specify specific device)
146     default_device = audio.get_default_input_device_info()
147     device_index = default_device['index']
148     SAMPLE_RATE = int(audio.get_device_info_by_index(device_index)['defaultSampleRate'])
149     
150     print(f"Using microphone: {default_device['name']}")
151 
152     # Open microphone stream
153     try:
154         stream = audio.open(
155             input=True,
156             frames_per_buffer=FRAMES_PER_BUFFER,
157             channels=CHANNELS,
158             format=FORMAT,
159             rate=SAMPLE_RATE,
160             input_device_index=device_index
161         )
162         print("Microphone stream opened successfully.")
163         print("Speak into your microphone. Press Ctrl+C to stop.")
164     except Exception as e:
165         print(f"Error opening microphone stream: {e}")
166         if audio:
167             audio.terminate()
168         return  # Exit if microphone cannot be opened
169 
170     # Create WebSocketApp
171     ws_app = websocket.WebSocketApp(
172         API_ENDPOINT,
173         header={"Authorization": f"Bearer {YOUR_API_KEY}"},  # Speechmatics uses Bearer token
174         on_open=on_open,
175         on_message=on_message,
176         on_error=on_error,
177         on_close=on_close,
178     )
179 
180     # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt
181     ws_thread = threading.Thread(target=lambda: ws_app.run_forever(ping_interval=30, ping_timeout=10))
182     ws_thread.daemon = True
183     ws_thread.start()
184 
185     try:
186         # Keep main thread alive until interrupted
187         while ws_thread.is_alive():
188             time.sleep(0.1)
189     except KeyboardInterrupt:
190         print("\nCtrl+C received. Stopping...")
191         stop_event.set()  # Signal audio thread to stop
192 
193         # Send EndOfStream message to the server
194         if ws_app and ws_app.sock and ws_app.sock.connected:
195             try:
196                 end_message = {
197                     "message": "EndOfStream",
198                     "last_seq_no": audio_seq_no
199                 }
200                 print(f"Sending termination message: {json.dumps(end_message)}")
201                 ws_app.send(json.dumps(end_message))
202                 # Give a moment for messages to process before forceful close
203                 time.sleep(1)
204             except Exception as e:
205                 print(f"Error sending termination message: {e}")
206 
207         # Close the WebSocket connection (will trigger on_close)
208         if ws_app:
209             ws_app.close()
210 
211         # Wait for WebSocket thread to finish
212         ws_thread.join(timeout=2.0)
213 
214     except Exception as e:
215         print(f"\nAn unexpected error occurred: {e}")
216         stop_event.set()
217         if ws_app:
218             ws_app.close()
219         ws_thread.join(timeout=2.0)
220 
221     finally:
222         # Final cleanup (already handled in on_close, but good as a fallback)
223         if stream and stream.is_active():
224             stream.stop_stream()
225         if stream:
226             stream.close()
227         if audio:
228             audio.terminate()
229         print("Cleanup complete. Exiting.")
230 
231 if __name__ == "__main__":
232     run()

Step 1: Install dependencies

Speechmatics

AssemblyAI

Install the required Python packages.

$ pip install websocket-client pyaudio

Step 2: Configure the API key

In this step, you’ll configure your API key to authenticate your requests.

Speechmatics

AssemblyAI

Navigate to API Keys in your account settings and copy your API key.

Speechmatics

AssemblyAI

Store your API key in a variable. Replace <YOUR_API_KEY> with your copied API key.

1 import pyaudio
2 import websocket
3 import json
4 import threading
5 import time
6 
7 YOUR_API_KEY = "YOUR-API-KEY"

Authenticate With A Temporary Token

Speechmatics

AssemblyAI

1 import requests
2 
3 def generate_temp_token(api_key, ttl=60):
4     """Generate a temporary authentication token that expires after the specified time."""
5     url = "https://mp.speechmatics.com/v1/api_keys?type=rt"
6     headers = {
7         "Content-Type": "application/json",
8         "Authorization": f"Bearer {api_key}"
9     }
10     payload = {
11         "ttl": ttl
12     }
13     
14     response = requests.post(url, json=payload, headers=headers)
15     data = response.json()
16     return data.get("key_value")

Token usage

Instead of authorizing your request with YOUR_API_KEY (via request header), use the temporary token generated by this function when establishing the WebSocket connection.

1   API_ENDPOINT= f"wss://eu2.rt.speechmatics.com/v2?jwt={generate_temp_token(api_key)}"
2   ws_app = websocket.WebSocketApp(
3     API_ENDPOINT,
4     on_open=on_open,
5     on_message=on_message,
6     on_error=on_error,
7     on_close=on_close,
8   )

Step 3: Set up audio configuration

Configure the audio settings for your microphone stream.

Speechmatics

AssemblyAI

1 import pyaudio
2 
3 # Audio Configuration
4 FRAMES_PER_BUFFER = 1024  # Chunk size
5 SAMPLE_RATE = None  # Will be set based on device capabilities
6 CHANNELS = 1
7 FORMAT = pyaudio.paFloat32  # Speechmatics uses float32 format
8 
9 # Global variables for audio stream and websocket
10 audio = None
11 stream = None
12 ws_app = None
13 audio_thread = None
14 stop_event = threading.Event()  # To signal the audio thread to stop
15 audio_seq_no = 0  # Track number of audio chunks sent
16 
17 def run():
18     global audio, stream, ws_app, SAMPLE_RATE
19 
20     # Initialize PyAudio
21     audio = pyaudio.PyAudio()
22     
23     # Get default input device (can alter to specify specific device)
24     default_device = audio.get_default_input_device_info()
25     device_index = default_device['index']
26     SAMPLE_RATE = int(audio.get_device_info_by_index(device_index)['defaultSampleRate'])
27     
28     print(f"Using microphone: {default_device['name']}")
29 
30     # Open microphone stream
31     try:
32         stream = audio.open(
33             input=True,
34             frames_per_buffer=FRAMES_PER_BUFFER,
35             channels=CHANNELS,
36             format=FORMAT,
37             rate=SAMPLE_RATE,
38             input_device_index=device_index
39         )
40         print("Microphone stream opened successfully.")
41         print("Speak into your microphone. Press Ctrl+C to stop.")
42     except Exception as e:
43         print(f"Error opening microphone stream: {e}")
44         if audio:
45             audio.terminate()
46         return  # Exit if microphone cannot be opened

Sample rate

Speechmatics recommends using a 16 kHz sample rate for speech audio. Anything higher will be downsampled server-side.

Audio data format

If you want to stream data from elsewhere, make sure that your audio data is in the following format:

Single-channel
PCM16 (default) or Mu-law encoding (see Specifying the encoding)
A sample rate that matches the value of the sample_rate parameter (16 kHz is recommended)
50 milliseconds of audio per message (larger chunk sizes are workable, but may result in latency fluctuations)

Step 4: Create event handlers

In this step, you’ll set up callback functions that handle the different events.

Create functions to handle the events from the real-time service.

Speechmatics

AssemblyAI

1 import json
2 
3 def on_open(ws):
4     """Called when the WebSocket connection is established."""
5     print("WebSocket connection opened.")
6     print(f"Connected to: {API_ENDPOINT}")
7     
8     # Send StartRecognition message
9     start_message = {
10         "message": "StartRecognition",
11         "audio_format": {
12             "type": "raw",
13             "encoding": "pcm_f32le",
14             "sample_rate": SAMPLE_RATE
15         },
16         "transcription_config": {
17             "language": CONNECTION_PARAMS["language"],
18             "enable_partials": CONNECTION_PARAMS["enable_partials"],
19             "max_delay": CONNECTION_PARAMS["max_delay"]
20         }
21     }
22     ws.send(json.dumps(start_message))
23 
24 def on_error(ws, error):
25     """Called when a WebSocket error occurs."""
26     print(f"\nWebSocket Error: {error}")
27     # Attempt to signal stop on error
28     stop_event.set()
29 
30 def on_close(ws, close_status_code, close_msg):
31     """Called when the WebSocket connection is closed."""
32     print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
33     # Ensure audio resources are released
34     global stream, audio
35     stop_event.set()  # Signal audio thread just in case it's still running
36 
37     if stream:
38         if stream.is_active():
39             stream.stop_stream()
40         stream.close()
41         stream = None
42     if audio:
43         audio.terminate()
44         audio = None
45     # Try to join the audio thread to ensure clean exit
46     if audio_thread and audio_thread.is_alive():
47         audio_thread.join(timeout=1.0)

Connection configuration

Speechmatics requires a handshake where the connection configuration is specified before audio is streamed. AssemblyAI allows you to configure the connection via query parameters in the URL and start streaming audio immediately.

The Speechmatics handshake begins when on_open sends a StartRecognition message to configure the session. Audio streaming only starts after the RecognitionStarted message type is parsed and confirmed in the on_message callback.

Create another function to handle transcripts.

Speechmatics has separate partial (AddPartialTranscript) and final (AddTranscript) transcripts. The terminate session message is EndOfTranscript.

AssemblyAI instead uses a Turn object with a turn_is_formatted boolean flag to indicate finality. The terminate session message is Termination. For more on the Turn object, see Streaming Core concepts section.

Speechmatics

AssemblyAI

1 def on_message(ws, message):
2     global audio_seq_no
3     
4     try:
5         data = json.loads(message)
6         msg_type = data.get('message')
7         
8         if msg_type == "RecognitionStarted":
9             session_id = data.get('id')
10             print(f"\nSession began: ID={session_id}")
11             
12             # Start sending audio data in a separate thread
13             def stream_audio():
14                 global audio_seq_no, stream
15                 print("Starting audio streaming...")
16                 while not stop_event.is_set():
17                     try:
18                         audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
19                         # Send audio data as binary message
20                         ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
21                         audio_seq_no += 1
22                     except Exception as e:
23                         print(f"Error streaming audio: {e}")
24                         # If stream read fails, likely means it's closed, stop the loop
25                         break
26                 print("Audio streaming stopped.")
27 
28             global audio_thread
29             audio_thread = threading.Thread(target=stream_audio)
30             audio_thread.daemon = (
31                 True  # Allow main thread to exit even if this thread is running
32             )
33             audio_thread.start()
34             
35         elif msg_type == "AddPartialTranscript":
36             transcript = data.get('metadata', {}).get('transcript', '')
37             if transcript:
38                 print(f"\r{transcript}", end='')
39                 
40         elif msg_type == "AddTranscript":
41             transcript = data.get('metadata', {}).get('transcript', '')
42             if transcript:
43                 # Clear previous line for final messages
44                 print('\r' + ' ' * 80 + '\r', end='')
45                 print(transcript)
46                 
47         elif msg_type == "EndOfTranscript":
48             print("\nSession Terminated: Transcription complete")
49             
50         elif msg_type == "Error":
51             error_type = data.get('type')
52             reason = data.get('reason')
53             print(f"\nWebSocket Error: {error_type} - {reason}")
54             stop_event.set()
55             
56     except json.JSONDecodeError as e:
57         print(f"Error decoding message: {e}")
58     except Exception as e:
59         print(f"Error handling message: {e}")

Transcript message structure

Please note the difference in transcript message structure below:

1 # Speechmatics
2 {
3   "message": "AddPartialTranscript",
4   "metadata": {
5     "transcript": "hello world"
6   },
7   # Other transcript data...
8 }
9 
10 # AssemblyAI
11 {
12   "type": "Turn",
13   "transcript": "hello world",
14   "turn_is_formatted": false,
15   # Other transcript data...
16 }

Step 5: Connect and start transcription

To stream audio, establish a connection to the API via WebSockets.

Speechmatics

AssemblyAI

Create a WebSocket connection to the Realtime service.

1 def run():
2     global audio, stream, ws_app, SAMPLE_RATE
3     # Skipping audio/microphone setup code...
4 
5     # Create WebSocketApp
6     ws_app = websocket.WebSocketApp(
7         API_ENDPOINT,
8         header={"Authorization": f"Bearer {YOUR_API_KEY}"},  # Speechmatics uses Bearer token
9         on_open=on_open,
10         on_message=on_message,
11         on_error=on_error,
12         on_close=on_close,
13     )
14 
15     # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt
16     ws_thread = threading.Thread(target=lambda: ws_app.run_forever(ping_interval=30, ping_timeout=10))
17     ws_thread.daemon = True
18     ws_thread.start()

Authorization

Note that while both services use an Authorization header to authenticate the WebSocket connection, Speechmatics uses a Bearer prefix, while AssemblyAI does not.

Step 6: Close the connection

Keep the main thread alive until interrupted, handle keyboard interrupts and thrown exceptions, and clean up upon closing of the WebSocket connection.

Speechmatics

AssemblyAI

1 def run():
2     global audio, stream, ws_app, SAMPLE_RATE
3     # Skipping audio/microphone setup and WebSocket connection code...
4 
5     try:
6         # Keep main thread alive until interrupted
7         while ws_thread.is_alive():
8             time.sleep(0.1)
9     except KeyboardInterrupt:
10         print("\nCtrl+C received. Stopping...")
11         stop_event.set()  # Signal audio thread to stop
12 
13         # Send EndOfStream message to the server
14         if ws_app and ws_app.sock and ws_app.sock.connected:
15             try:
16                 end_message = {
17                     "message": "EndOfStream",
18                     "last_seq_no": audio_seq_no
19                 }
20                 print(f"Sending termination message: {json.dumps(end_message)}")
21                 ws_app.send(json.dumps(end_message))
22                 # Give a moment for messages to process before forceful close
23                 time.sleep(1)
24             except Exception as e:
25                 print(f"Error sending termination message: {e}")
26 
27         # Close the WebSocket connection (will trigger on_close)
28         if ws_app:
29             ws_app.close()
30 
31         # Wait for WebSocket thread to finish
32         ws_thread.join(timeout=2.0)
33 
34     except Exception as e:
35         print(f"\nAn unexpected error occurred: {e}")
36         stop_event.set()
37         if ws_app:
38             ws_app.close()
39         ws_thread.join(timeout=2.0)
40 
41     finally:
42         # Final cleanup (already handled in on_close, but good as a fallback)
43         if stream and stream.is_active():
44             stream.stop_stream()
45         if stream:
46             stream.close()
47         if audio:
48             audio.terminate()
49         print("Cleanup complete. Exiting.")

The connection will close automatically when you press Ctrl+C. In both cases, the on_close handler will clean up the audio resources.

Step 7: Execute the main function

Finally, run the main function to start the main execution.

Speechmatics

AssemblyAI

1 if __name__ == "__main__":
2     run()

Next steps

To learn more about both Streaming APIs, their key differences, and how to best migrate, see the following resources:

AssemblyAI

Speechmatics

Need some help?

If you get stuck or have any other questions, contact our support team at support@assemblyai.com or create a support ticket.