Migrating from Streaming v2 to Streaming v3 (JavaScript) | AssemblyAI

This cookbook guides you through migrating from AssemblyAI’s legacy Streaming STT model (v2) to our latest Universal Streaming STT model (v3), which provides ultra-low latency for faster transcription, intelligent endpointing for more natural speech detection, and improved accuracy across various audio conditions.

Check out this blog post to learn more about this new model!

Overview of changes

The migration involves several key improvements:

API Version: Upgrade from v2 (/v2/realtime/ws) to v3 (/v3/ws)
Enhanced Error Handling: Robust cleanup and resource management
Modern Message Format: Updated message types and structure
Configuration Options: More flexible connection parameters
Graceful Shutdown: Proper termination handling

You can follow the step-by-step guide below to make changes to your existing code but here is what your code should look like in the end:

1 const WebSocket = require("ws");
2 const mic = require("mic");
3 const querystring = require("querystring");
4 const fs = require("fs");
5 
6 // --- Configuration ---
7 const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8 const CONNECTION_PARAMS = {
9   sample_rate: 16000,
10   format_turns: true, // Request formatted final transcripts
11 };
12 const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
13 const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
14 
15 // Audio Configuration
16 const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
17 const CHANNELS = 1;
18 
19 // Global variables
20 let micInstance = null;
21 let micInputStream = null;
22 let ws = null;
23 let stopRequested = false;
24 
25 // WAV recording variables
26 let recordedFrames = []; // Store audio frames for WAV file
27 
28 // --- Helper functions ---
29 function clearLine() {
30   process.stdout.write("\r" + " ".repeat(80) + "\r");
31 }
32 
33 function formatTimestamp(timestamp) {
34   return new Date(timestamp * 1000).toISOString();
35 }
36 
37 function createWavHeader(sampleRate, channels, dataLength) {
38   const buffer = Buffer.alloc(44);
39   
40   // RIFF header
41   buffer.write('RIFF', 0);
42   buffer.writeUInt32LE(36 + dataLength, 4);
43   buffer.write('WAVE', 8);
44   
45   // fmt chunk
46   buffer.write('fmt ', 12);
47   buffer.writeUInt32LE(16, 16); // fmt chunk size
48   buffer.writeUInt16LE(1, 20);  // PCM format
49   buffer.writeUInt16LE(channels, 22);
50   buffer.writeUInt32LE(sampleRate, 24);
51   buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
52   buffer.writeUInt16LE(channels * 2, 32); // block align
53   buffer.writeUInt16LE(16, 34); // bits per sample
54   
55   // data chunk
56   buffer.write('data', 36);
57   buffer.writeUInt32LE(dataLength, 40);
58   
59   return buffer;
60 }
61 
62 function saveWavFile() {
63   if (recordedFrames.length === 0) {
64     console.log("No audio data recorded.");
65     return;
66   }
67   
68   // Generate filename with timestamp
69   const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
70   const filename = `recorded_audio_${timestamp}.wav`;
71   
72   try {
73     // Combine all recorded frames
74     const audioData = Buffer.concat(recordedFrames);
75     const dataLength = audioData.length;
76     
77     // Create WAV header
78     const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
79     
80     // Write WAV file
81     const wavFile = Buffer.concat([wavHeader, audioData]);
82     fs.writeFileSync(filename, wavFile);
83     
84     console.log(`Audio saved to: ${filename}`);
85     console.log(`Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`);
86     
87   } catch (error) {
88     console.error(`Error saving WAV file: ${error}`);
89   }
90 }
91 
92 // --- Main function ---
93 async function run() {
94   console.log("Starting AssemblyAI streaming transcription...");
95   console.log("Audio will be saved to a WAV file when the session ends.");
96 
97   // Initialize WebSocket connection
98   ws = new WebSocket(API_ENDPOINT, {
99     headers: {
100       Authorization: YOUR_API_KEY,
101     },
102   });
103 
104   // Setup WebSocket event handlers
105   ws.on("open", () => {
106     console.log("WebSocket connection opened.");
107     console.log(`Connected to: ${API_ENDPOINT}`);
108     // Start the microphone
109     startMicrophone();
110   });
111 
112   ws.on("message", (message) => {
113     try {
114       const data = JSON.parse(message);
115       const msgType = data.type;
116 
117       if (msgType === "Begin") {
118         const sessionId = data.id;
119         const expiresAt = data.expires_at;
120         console.log(
121           `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
122         );
123       } else if (msgType === "Turn") {
124         const transcript = data.transcript || "";
125         const formatted = data.turn_is_formatted;
126 
127         if (formatted) {
128           clearLine();
129           console.log(transcript);
130         } else {
131           process.stdout.write(`\r${transcript}`);
132         }
133       } else if (msgType === "Termination") {
134         const audioDuration = data.audio_duration_seconds;
135         const sessionDuration = data.session_duration_seconds;
136         console.log(
137           `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
138         );
139       }
140     } catch (error) {
141       console.error(`\nError handling message: ${error}`);
142       console.error(`Message data: ${message}`);
143     }
144   });
145 
146   ws.on("error", (error) => {
147     console.error(`\nWebSocket Error: ${error}`);
148     cleanup();
149   });
150 
151   ws.on("close", (code, reason) => {
152     console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
153     cleanup();
154   });
155 
156   // Handle process termination
157   setupTerminationHandlers();
158 }
159 
160 function startMicrophone() {
161   try {
162     micInstance = mic({
163       rate: SAMPLE_RATE.toString(),
164       channels: CHANNELS.toString(),
165       debug: false,
166       exitOnSilence: 6, // This won't actually exit, just a parameter for mic
167     });
168 
169     micInputStream = micInstance.getAudioStream();
170 
171     micInputStream.on("data", (data) => {
172       if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
173         // Store audio data for WAV recording
174         recordedFrames.push(Buffer.from(data));
175         
176         // Send audio data to WebSocket
177         ws.send(data);
178       }
179     });
180 
181     micInputStream.on("error", (err) => {
182       console.error(`Microphone Error: ${err}`);
183       cleanup();
184     });
185 
186     micInstance.start();
187     console.log("Microphone stream opened successfully.");
188     console.log("Speak into your microphone. Press Ctrl+C to stop.");
189   } catch (error) {
190     console.error(`Error opening microphone stream: ${error}`);
191     cleanup();
192   }
193 }
194 
195 function cleanup() {
196   stopRequested = true;
197 
198   // Save recorded audio to WAV file
199   saveWavFile();
200 
201   // Stop microphone if it's running
202   if (micInstance) {
203     try {
204       micInstance.stop();
205     } catch (error) {
206       console.error(`Error stopping microphone: ${error}`);
207     }
208     micInstance = null;
209   }
210 
211   // Close WebSocket connection if it's open
212   if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
213     try {
214       // Send termination message if possible
215       if (ws.readyState === WebSocket.OPEN) {
216         const terminateMessage = { type: "Terminate" };
217         console.log(
218           `Sending termination message: ${JSON.stringify(terminateMessage)}`
219         );
220         ws.send(JSON.stringify(terminateMessage));
221       }
222       ws.close();
223     } catch (error) {
224       console.error(`Error closing WebSocket: ${error}`);
225     }
226     ws = null;
227   }
228 
229   console.log("Cleanup complete.");
230 }
231 
232 function setupTerminationHandlers() {
233   // Handle Ctrl+C and other termination signals
234   process.on("SIGINT", () => {
235     console.log("\nCtrl+C received. Stopping...");
236     cleanup();
237     // Give time for cleanup before exiting
238     setTimeout(() => process.exit(0), 1000);
239   });
240 
241   process.on("SIGTERM", () => {
242     console.log("\nTermination signal received. Stopping...");
243     cleanup();
244     // Give time for cleanup before exiting
245     setTimeout(() => process.exit(0), 1000);
246   });
247 
248   // Handle uncaught exceptions
249   process.on("uncaughtException", (error) => {
250     console.error(`\nUncaught exception: ${error}`);
251     cleanup();
252     // Give time for cleanup before exiting
253     setTimeout(() => process.exit(1), 1000);
254   });
255 }
256 
257 // Start the application
258 run();

For more information on our Universal Streaming feature, see this section of our official documentation.

Step-by-step migration guide

1. Update API endpoint and configuration

Before (v2):

1 const API_KEY = "<YOUR_API_KEY>";
2 const SAMPLE_RATE = 16000; // 16kHz sample rate
3 
4 const ws = new WebSocket(
5   `wss://api.assemblyai.com/v2/realtime/ws?sample_rate=${SAMPLE_RATE}`,
6   {
7     headers: {
8       Authorization: API_KEY,
9     },
10   }
11 );

After (v3):

1 // --- Configuration ---
2 const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
3 const CONNECTION_PARAMS = {
4   sample_rate: 16000,
5   format_turns: true, // Request formatted final transcripts
6 };
7 const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
8 const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
9 
10 // Initialize WebSocket connection
11 ws = new WebSocket(API_ENDPOINT, {
12   headers: {
13     Authorization: YOUR_API_KEY,
14   },
15 });

Key Changes:

New base URL: streaming.assemblyai.com instead of api.assemblyai.com
Version upgrade: /v3/ws instead of /v2/realtime/ws
Configuration via URL parameters using querystring
Added format_turns option for better transcript formatting

2. Audio configuration

Before (v2):

1 const SAMPLE_RATE = 16000; 
2 const CHANNELS = 1;

After (v3):

1 const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
2 const CHANNELS = 1;

Key Changes:

Sample rate now references the configuration parameter

3. Update message handling schema

Before (v2):

1 ws.on("message", (message) => {
2   try {
3     const msg = JSON.parse(message);
4     const msgType = msg.message_type;
5 
6     if (msgType === 'SessionBegins') {
7       const sessionId = msg.session_id;
8       console.log("Session ID:", sessionId);
9       return;
10     }
11 
12     const text = msg.text || '';
13     if (!text) {
14       return;
15     }
16 
17     if (msgType === 'PartialTranscript') {
18       console.log("Partial:", text);
19     } else if (msgType === 'FinalTranscript') {
20       console.log("Final:", text);
21     } else if (msgType === 'error') {
22       console.error("Error:", msg.error);
23     }
24   } catch (error) {
25     console.error("Error handling message:", error);
26   }
27 });

After (v3):

1   ws.on("message", (message) => {
2     try {
3       const data = JSON.parse(message);
4       const msgType = data.type;
5       if (msgType === "Begin") {
6         const sessionId = data.id;
7         const expiresAt = data.expires_at;
8         console.log(
9           `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
10         );
11       } else if (msgType === "Turn") {
12         const transcript = data.transcript || "";
13         const formatted = data.turn_is_formatted;
14         if (formatted) {
15           clearLine();
16           console.log(transcript);
17         } else {
18           process.stdout.write(`\r${transcript}`);
19         }
20       } else if (msgType === "Termination") {
21         const audioDuration = data.audio_duration_seconds;
22         const sessionDuration = data.session_duration_seconds;
23         console.log(
24           `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
25         );
26       }
27     } catch (error) {
28       console.error(`\nError handling message: ${error}`);
29       console.error(`Message data: ${message}`);
30     }
31   });

Key Changes:

Message types renamed: SessionBegins → Begin, PartialTranscript/FinalTranscript → Turn
Field names updated: message_type → type, session_id → id, text → transcript
Added session expiration timestamp handling (expires_at)
New transcript formatting with turn_is_formatted flag
Added turn tracking with turn_order and end_of_turn fields
New confidence scoring with end_of_turn_confidence
Added Termination message with session statistics
Error handling moved from message-based to WebSocket events

4. Add graceful shutdown handling and improve error handling and logging

Before (v2):

1 ws.on("close", (code, reason) => onClose(ws, code, reason));
2 
3 function onClose(ws, code, reason) {
4   if (recording) {
5     recording.end();
6   }
7   console.log("Disconnected");
8 }
9 
10 process.on("SIGINT", async function () {
11   console.log();
12   console.log("Stopping recording");
13   if (recording) {
14     recording.end();
15   }
16   console.log("Closing real-time transcript connection");
17   if (ws.readyState === WebSocket.OPEN) {
18     ws.close();
19   }
20   process.exit();
21 });

After (v3):

1 ws.on("close", (code, reason) => {
2   console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
3   cleanup();
4 });
5 
6 function cleanup() {
7   stopRequested = true;
8   // Save recorded audio to WAV file
9   saveWavFile();
10   // Stop microphone if it's running
11   if (micInstance) {
12     try {
13       micInstance.stop();
14     } catch (error) {
15       console.error(`Error stopping microphone: ${error}`);
16     }
17     micInstance = null;
18   }
19   // Close WebSocket connection if it's open
20   if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
21     try {
22       // Send termination message if possible
23       if (ws.readyState === WebSocket.OPEN) {
24         const terminateMessage = { type: "Terminate" };
25         console.log(
26           `Sending termination message: ${JSON.stringify(terminateMessage)}`
27         );
28         ws.send(JSON.stringify(terminateMessage));
29       }
30       ws.close();
31     } catch (error) {
32       console.error(`Error closing WebSocket: ${error}`);
33     }
34     ws = null;
35   }
36   console.log("Cleanup complete.");
37 }
38 
39 function setupTerminationHandlers() {
40   // Handle Ctrl+C and other termination signals
41   process.on("SIGINT", () => {
42     console.log("\nCtrl+C received. Stopping...");
43     cleanup();
44     // Give time for cleanup before exiting
45     setTimeout(() => process.exit(0), 1000);
46   });
47   process.on("SIGTERM", () => {
48     console.log("\nTermination signal received. Stopping...");
49     cleanup();
50     // Give time for cleanup before exiting
51     setTimeout(() => process.exit(0), 1000);
52   });
53   // Handle uncaught exceptions
54   process.on("uncaughtException", (error) => {
55     console.error(`\nUncaught exception: ${error}`);
56     cleanup();
57     // Give time for cleanup before exiting
58     setTimeout(() => process.exit(1), 1000);
59   });
60 }

Key Changes:

Proper KeyboardInterrupt handling
Graceful termination message sending
Detailed error context and timestamps
Proper exception type handling
Resource cleanup on all error paths
Connection status checking before operations

Note: Pricing is based on session duration so it is very important to close sessions properly to avoid unexpected usage and cost.

Migration checklist

Update API endpoint from v2 to v3
Update message type handling (Begin, Turn, Termination)
Add proper resource cleanup in all code paths
Update field names in message parsing
Add graceful shutdown with termination messages
Add detailed error logging with context
Test KeyboardInterrupt handling
Verify audio resource cleanup
Test connection failure scenarios

Testing your migration

Basic Functionality: Verify transcription works with simple speech
Error Handling: Test with invalid API keys or network issues
Graceful Shutdown: Test Ctrl+C interruption
Resource Cleanup: Monitor for memory leaks during extended use
Message Formatting: Test with format_turns enabled/disabled

Common migration issues

Issue: “WebSocket connection failed”

Solution: Verify you’re using the new v3 endpoint URL and proper authentication header format.

Issue: “Message type not recognized”

Solution: Update message type handling from old names (SessionBegins, PartialTranscript) to new ones (Begin, Turn).

Benefits of migration

Improved Reliability: Better error handling and recovery
Lower Latency: Reduced buffer sizes for faster response
Enhanced Features: Formatted transcripts and session statistics
Better Resource Management: Proper cleanup prevents memory leaks
Graceful Shutdown: Clean termination with proper cleanup

Conclusion

This migration provides a more robust, maintainable, and feature-rich streaming transcription implementation. The enhanced error handling, resource management, and modern API features make it suitable for production use cases where reliability and performance are critical.