Migrating from Streaming v2 to Streaming v3 (JavaScript)

This cookbook guides you through migrating from AssemblyAI’s legacy Streaming STT model (v2) to our latest Universal Streaming STT model (v3), which provides ultra-low latency for faster transcription, intelligent endpointing for more natural speech detection, and improved accuracy across various audio conditions.

Check out this blog post to learn more about this new model!

Overview of changes

The migration involves several key improvements:

  • API Version: Upgrade from v2 (/v2/realtime/ws) to v3 (/v3/ws)
  • Enhanced Error Handling: Robust cleanup and resource management
  • Modern Message Format: Updated message types and structure
  • Configuration Options: More flexible connection parameters
  • Graceful Shutdown: Proper termination handling

You can follow the step-by-step guide below to make changes to your existing code but here is what your code should look like in the end:

1const WebSocket = require("ws");
2const mic = require("mic");
3const querystring = require("querystring");
4const fs = require("fs");
5
6// --- Configuration ---
7const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8const CONNECTION_PARAMS = {
9 sample_rate: 16000,
10 format_turns: true, // Request formatted final transcripts
11};
12const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
13const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
14
15// Audio Configuration
16const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
17const CHANNELS = 1;
18
19// Global variables
20let micInstance = null;
21let micInputStream = null;
22let ws = null;
23let stopRequested = false;
24
25// WAV recording variables
26let recordedFrames = []; // Store audio frames for WAV file
27
28// --- Helper functions ---
29function clearLine() {
30 process.stdout.write("\r" + " ".repeat(80) + "\r");
31}
32
33function formatTimestamp(timestamp) {
34 return new Date(timestamp * 1000).toISOString();
35}
36
37function createWavHeader(sampleRate, channels, dataLength) {
38 const buffer = Buffer.alloc(44);
39
40 // RIFF header
41 buffer.write('RIFF', 0);
42 buffer.writeUInt32LE(36 + dataLength, 4);
43 buffer.write('WAVE', 8);
44
45 // fmt chunk
46 buffer.write('fmt ', 12);
47 buffer.writeUInt32LE(16, 16); // fmt chunk size
48 buffer.writeUInt16LE(1, 20); // PCM format
49 buffer.writeUInt16LE(channels, 22);
50 buffer.writeUInt32LE(sampleRate, 24);
51 buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
52 buffer.writeUInt16LE(channels * 2, 32); // block align
53 buffer.writeUInt16LE(16, 34); // bits per sample
54
55 // data chunk
56 buffer.write('data', 36);
57 buffer.writeUInt32LE(dataLength, 40);
58
59 return buffer;
60}
61
62function saveWavFile() {
63 if (recordedFrames.length === 0) {
64 console.log("No audio data recorded.");
65 return;
66 }
67
68 // Generate filename with timestamp
69 const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
70 const filename = `recorded_audio_${timestamp}.wav`;
71
72 try {
73 // Combine all recorded frames
74 const audioData = Buffer.concat(recordedFrames);
75 const dataLength = audioData.length;
76
77 // Create WAV header
78 const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
79
80 // Write WAV file
81 const wavFile = Buffer.concat([wavHeader, audioData]);
82 fs.writeFileSync(filename, wavFile);
83
84 console.log(`Audio saved to: ${filename}`);
85 console.log(`Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`);
86
87 } catch (error) {
88 console.error(`Error saving WAV file: ${error}`);
89 }
90}
91
92// --- Main function ---
93async function run() {
94 console.log("Starting AssemblyAI streaming transcription...");
95 console.log("Audio will be saved to a WAV file when the session ends.");
96
97 // Initialize WebSocket connection
98 ws = new WebSocket(API_ENDPOINT, {
99 headers: {
100 Authorization: YOUR_API_KEY,
101 },
102 });
103
104 // Setup WebSocket event handlers
105 ws.on("open", () => {
106 console.log("WebSocket connection opened.");
107 console.log(`Connected to: ${API_ENDPOINT}`);
108 // Start the microphone
109 startMicrophone();
110 });
111
112 ws.on("message", (message) => {
113 try {
114 const data = JSON.parse(message);
115 const msgType = data.type;
116
117 if (msgType === "Begin") {
118 const sessionId = data.id;
119 const expiresAt = data.expires_at;
120 console.log(
121 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
122 );
123 } else if (msgType === "Turn") {
124 const transcript = data.transcript || "";
125 const formatted = data.turn_is_formatted;
126
127 if (formatted) {
128 clearLine();
129 console.log(transcript);
130 } else {
131 process.stdout.write(`\r${transcript}`);
132 }
133 } else if (msgType === "Termination") {
134 const audioDuration = data.audio_duration_seconds;
135 const sessionDuration = data.session_duration_seconds;
136 console.log(
137 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
138 );
139 }
140 } catch (error) {
141 console.error(`\nError handling message: ${error}`);
142 console.error(`Message data: ${message}`);
143 }
144 });
145
146 ws.on("error", (error) => {
147 console.error(`\nWebSocket Error: ${error}`);
148 cleanup();
149 });
150
151 ws.on("close", (code, reason) => {
152 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
153 cleanup();
154 });
155
156 // Handle process termination
157 setupTerminationHandlers();
158}
159
160function startMicrophone() {
161 try {
162 micInstance = mic({
163 rate: SAMPLE_RATE.toString(),
164 channels: CHANNELS.toString(),
165 debug: false,
166 exitOnSilence: 6, // This won't actually exit, just a parameter for mic
167 });
168
169 micInputStream = micInstance.getAudioStream();
170
171 micInputStream.on("data", (data) => {
172 if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
173 // Store audio data for WAV recording
174 recordedFrames.push(Buffer.from(data));
175
176 // Send audio data to WebSocket
177 ws.send(data);
178 }
179 });
180
181 micInputStream.on("error", (err) => {
182 console.error(`Microphone Error: ${err}`);
183 cleanup();
184 });
185
186 micInstance.start();
187 console.log("Microphone stream opened successfully.");
188 console.log("Speak into your microphone. Press Ctrl+C to stop.");
189 } catch (error) {
190 console.error(`Error opening microphone stream: ${error}`);
191 cleanup();
192 }
193}
194
195function cleanup() {
196 stopRequested = true;
197
198 // Save recorded audio to WAV file
199 saveWavFile();
200
201 // Stop microphone if it's running
202 if (micInstance) {
203 try {
204 micInstance.stop();
205 } catch (error) {
206 console.error(`Error stopping microphone: ${error}`);
207 }
208 micInstance = null;
209 }
210
211 // Close WebSocket connection if it's open
212 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
213 try {
214 // Send termination message if possible
215 if (ws.readyState === WebSocket.OPEN) {
216 const terminateMessage = { type: "Terminate" };
217 console.log(
218 `Sending termination message: ${JSON.stringify(terminateMessage)}`
219 );
220 ws.send(JSON.stringify(terminateMessage));
221 }
222 ws.close();
223 } catch (error) {
224 console.error(`Error closing WebSocket: ${error}`);
225 }
226 ws = null;
227 }
228
229 console.log("Cleanup complete.");
230}
231
232function setupTerminationHandlers() {
233 // Handle Ctrl+C and other termination signals
234 process.on("SIGINT", () => {
235 console.log("\nCtrl+C received. Stopping...");
236 cleanup();
237 // Give time for cleanup before exiting
238 setTimeout(() => process.exit(0), 1000);
239 });
240
241 process.on("SIGTERM", () => {
242 console.log("\nTermination signal received. Stopping...");
243 cleanup();
244 // Give time for cleanup before exiting
245 setTimeout(() => process.exit(0), 1000);
246 });
247
248 // Handle uncaught exceptions
249 process.on("uncaughtException", (error) => {
250 console.error(`\nUncaught exception: ${error}`);
251 cleanup();
252 // Give time for cleanup before exiting
253 setTimeout(() => process.exit(1), 1000);
254 });
255}
256
257// Start the application
258run();

For more information on our Universal Streaming feature, see this section of our official documentation.

Step-by-step migration guide

1. Update API endpoint and configuration

Before (v2):

1const API_KEY = "<YOUR_API_KEY>";
2const SAMPLE_RATE = 16000; // 16kHz sample rate
3
4const ws = new WebSocket(
5 `wss://api.assemblyai.com/v2/realtime/ws?sample_rate=${SAMPLE_RATE}`,
6 {
7 headers: {
8 Authorization: API_KEY,
9 },
10 }
11);

After (v3):

1// --- Configuration ---
2const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
3const CONNECTION_PARAMS = {
4 sample_rate: 16000,
5 format_turns: true, // Request formatted final transcripts
6};
7const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
8const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
9
10// Initialize WebSocket connection
11ws = new WebSocket(API_ENDPOINT, {
12 headers: {
13 Authorization: YOUR_API_KEY,
14 },
15});

Key Changes:

  • New base URL: streaming.assemblyai.com instead of api.assemblyai.com
  • Version upgrade: /v3/ws instead of /v2/realtime/ws
  • Configuration via URL parameters using querystring
  • Added format_turns option for better transcript formatting

2. Audio configuration

Before (v2):

1const SAMPLE_RATE = 16000;
2const CHANNELS = 1;

After (v3):

1const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
2const CHANNELS = 1;

Key Changes:

  • Sample rate now references the configuration parameter

3. Update message handling schema

Before (v2):

1ws.on("message", (message) => {
2 try {
3 const msg = JSON.parse(message);
4 const msgType = msg.message_type;
5
6 if (msgType === 'SessionBegins') {
7 const sessionId = msg.session_id;
8 console.log("Session ID:", sessionId);
9 return;
10 }
11
12 const text = msg.text || '';
13 if (!text) {
14 return;
15 }
16
17 if (msgType === 'PartialTranscript') {
18 console.log("Partial:", text);
19 } else if (msgType === 'FinalTranscript') {
20 console.log("Final:", text);
21 } else if (msgType === 'error') {
22 console.error("Error:", msg.error);
23 }
24 } catch (error) {
25 console.error("Error handling message:", error);
26 }
27});

After (v3):

1 ws.on("message", (message) => {
2 try {
3 const data = JSON.parse(message);
4 const msgType = data.type;
5 if (msgType === "Begin") {
6 const sessionId = data.id;
7 const expiresAt = data.expires_at;
8 console.log(
9 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
10 );
11 } else if (msgType === "Turn") {
12 const transcript = data.transcript || "";
13 const formatted = data.turn_is_formatted;
14 if (formatted) {
15 clearLine();
16 console.log(transcript);
17 } else {
18 process.stdout.write(`\r${transcript}`);
19 }
20 } else if (msgType === "Termination") {
21 const audioDuration = data.audio_duration_seconds;
22 const sessionDuration = data.session_duration_seconds;
23 console.log(
24 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
25 );
26 }
27 } catch (error) {
28 console.error(`\nError handling message: ${error}`);
29 console.error(`Message data: ${message}`);
30 }
31 });

Key Changes:

  • Message types renamed: SessionBegins → Begin, PartialTranscript/FinalTranscript → Turn
  • Field names updated: message_type → type, session_id → id, text → transcript
  • Added session expiration timestamp handling (expires_at)
  • New transcript formatting with turn_is_formatted flag
  • Added turn tracking with turn_order and end_of_turn fields
  • New confidence scoring with end_of_turn_confidence
  • Added Termination message with session statistics
  • Error handling moved from message-based to WebSocket events

4. Add graceful shutdown handling and improve error handling and logging

Before (v2):

1ws.on("close", (code, reason) => onClose(ws, code, reason));
2
3function onClose(ws, code, reason) {
4 if (recording) {
5 recording.end();
6 }
7 console.log("Disconnected");
8}
9
10process.on("SIGINT", async function () {
11 console.log();
12 console.log("Stopping recording");
13 if (recording) {
14 recording.end();
15 }
16 console.log("Closing real-time transcript connection");
17 if (ws.readyState === WebSocket.OPEN) {
18 ws.close();
19 }
20 process.exit();
21});

After (v3):

1ws.on("close", (code, reason) => {
2 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
3 cleanup();
4});
5
6function cleanup() {
7 stopRequested = true;
8 // Save recorded audio to WAV file
9 saveWavFile();
10 // Stop microphone if it's running
11 if (micInstance) {
12 try {
13 micInstance.stop();
14 } catch (error) {
15 console.error(`Error stopping microphone: ${error}`);
16 }
17 micInstance = null;
18 }
19 // Close WebSocket connection if it's open
20 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
21 try {
22 // Send termination message if possible
23 if (ws.readyState === WebSocket.OPEN) {
24 const terminateMessage = { type: "Terminate" };
25 console.log(
26 `Sending termination message: ${JSON.stringify(terminateMessage)}`
27 );
28 ws.send(JSON.stringify(terminateMessage));
29 }
30 ws.close();
31 } catch (error) {
32 console.error(`Error closing WebSocket: ${error}`);
33 }
34 ws = null;
35 }
36 console.log("Cleanup complete.");
37}
38
39function setupTerminationHandlers() {
40 // Handle Ctrl+C and other termination signals
41 process.on("SIGINT", () => {
42 console.log("\nCtrl+C received. Stopping...");
43 cleanup();
44 // Give time for cleanup before exiting
45 setTimeout(() => process.exit(0), 1000);
46 });
47 process.on("SIGTERM", () => {
48 console.log("\nTermination signal received. Stopping...");
49 cleanup();
50 // Give time for cleanup before exiting
51 setTimeout(() => process.exit(0), 1000);
52 });
53 // Handle uncaught exceptions
54 process.on("uncaughtException", (error) => {
55 console.error(`\nUncaught exception: ${error}`);
56 cleanup();
57 // Give time for cleanup before exiting
58 setTimeout(() => process.exit(1), 1000);
59 });
60}

Key Changes:

  • Proper KeyboardInterrupt handling
  • Graceful termination message sending
  • Detailed error context and timestamps
  • Proper exception type handling
  • Resource cleanup on all error paths
  • Connection status checking before operations
Note: Pricing is based on session duration so it is very important to close sessions properly to avoid unexpected usage and cost.

Migration checklist

  • Update API endpoint from v2 to v3
  • Update message type handling (Begin, Turn, Termination)
  • Add proper resource cleanup in all code paths
  • Update field names in message parsing
  • Add graceful shutdown with termination messages
  • Add detailed error logging with context
  • Test KeyboardInterrupt handling
  • Verify audio resource cleanup
  • Test connection failure scenarios

Testing your migration

  1. Basic Functionality: Verify transcription works with simple speech
  2. Error Handling: Test with invalid API keys or network issues
  3. Graceful Shutdown: Test Ctrl+C interruption
  4. Resource Cleanup: Monitor for memory leaks during extended use
  5. Message Formatting: Test with format_turns enabled/disabled

Common migration issues

Issue: “WebSocket connection failed”

Solution: Verify you’re using the new v3 endpoint URL and proper authentication header format.

Issue: “Message type not recognized”

Solution: Update message type handling from old names (SessionBegins, PartialTranscript) to new ones (Begin, Turn).

Benefits of migration

  • Improved Reliability: Better error handling and recovery
  • Lower Latency: Reduced buffer sizes for faster response
  • Enhanced Features: Formatted transcripts and session statistics
  • Better Resource Management: Proper cleanup prevents memory leaks
  • Graceful Shutdown: Clean termination with proper cleanup

Conclusion

This migration provides a more robust, maintainable, and feature-rich streaming transcription implementation. The enhanced error handling, resource management, and modern API features make it suitable for production use cases where reliability and performance are critical.