Keyterm boosting for Universal-Streaming

The keyterm boosting feature helps improve recognition accuracy for specific words and phrases that are important to your use case.

Keyterms Boosting is currently in beta and available to all users free of charge. Pricing is still being finalized and may apply in the future.

As we continue to develop this feature, functionality may evolve. For the latest updates and code examples, please refer to this page.

Quickstart

Firstly, install the required dependencies.

$npm install ws mic
1const WebSocket = require("ws");
2const mic = require("mic");
3const querystring = require("querystring");
4const fs = require("fs");
5
6// --- Configuration ---
7const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8const CONNECTION_PARAMS = {
9 sample_rate: 16000,
10 format_turns: true, // Request formatted final transcripts
11 keyterms: JSON.stringify(["Keanu Reeves", "AssemblyAI", "Universal-2"]),
12};
13const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
14const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
15
16// Audio Configuration
17const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
18const CHANNELS = 1;
19
20// Global variables
21let micInstance = null;
22let micInputStream = null;
23let ws = null;
24let stopRequested = false;
25
26// WAV recording variables
27let recordedFrames = []; // Store audio frames for WAV file
28
29// --- Helper functions ---
30function clearLine() {
31 process.stdout.write("\r" + " ".repeat(80) + "\r");
32}
33
34function formatTimestamp(timestamp) {
35 return new Date(timestamp * 1000).toISOString();
36}
37
38function createWavHeader(sampleRate, channels, dataLength) {
39 const buffer = Buffer.alloc(44);
40
41 // RIFF header
42 buffer.write("RIFF", 0);
43 buffer.writeUInt32LE(36 + dataLength, 4);
44 buffer.write("WAVE", 8);
45
46 // fmt chunk
47 buffer.write("fmt ", 12);
48 buffer.writeUInt32LE(16, 16); // fmt chunk size
49 buffer.writeUInt16LE(1, 20); // PCM format
50 buffer.writeUInt16LE(channels, 22);
51 buffer.writeUInt32LE(sampleRate, 24);
52 buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
53 buffer.writeUInt16LE(channels * 2, 32); // block align
54 buffer.writeUInt16LE(16, 34); // bits per sample
55
56 // data chunk
57 buffer.write("data", 36);
58 buffer.writeUInt32LE(dataLength, 40);
59
60 return buffer;
61}
62
63function saveWavFile() {
64 if (recordedFrames.length === 0) {
65 console.log("No audio data recorded.");
66 return;
67 }
68
69 // Generate filename with timestamp
70 const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
71 const filename = `recorded_audio_${timestamp}.wav`;
72
73 try {
74 // Combine all recorded frames
75 const audioData = Buffer.concat(recordedFrames);
76 const dataLength = audioData.length;
77
78 // Create WAV header
79 const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
80
81 // Write WAV file
82 const wavFile = Buffer.concat([wavHeader, audioData]);
83 fs.writeFileSync(filename, wavFile);
84
85 console.log(`Audio saved to: ${filename}`);
86 console.log(
87 `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
88 );
89 } catch (error) {
90 console.error(`Error saving WAV file: ${error}`);
91 }
92}
93
94// --- Main function ---
95async function run() {
96 console.log("Starting AssemblyAI real-time transcription...");
97 console.log("Audio will be saved to a WAV file when the session ends.");
98
99 // Initialize WebSocket connection
100 ws = new WebSocket(API_ENDPOINT, {
101 headers: {
102 Authorization: YOUR_API_KEY,
103 },
104 });
105
106 // Setup WebSocket event handlers
107 ws.on("open", () => {
108 console.log("WebSocket connection opened.");
109 console.log(`Connected to: ${API_ENDPOINT}`);
110 // Start the microphone
111 startMicrophone();
112 });
113
114 ws.on("message", (message) => {
115 try {
116 const data = JSON.parse(message);
117 const msgType = data.type;
118
119 if (msgType === "Begin") {
120 const sessionId = data.id;
121 const expiresAt = data.expires_at;
122 console.log(
123 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
124 );
125 } else if (msgType === "Turn") {
126 const transcript = data.transcript || "";
127 const formatted = data.turn_is_formatted;
128
129 if (formatted) {
130 clearLine();
131 console.log(transcript);
132 } else {
133 process.stdout.write(`\r${transcript}`);
134 }
135 } else if (msgType === "Termination") {
136 const audioDuration = data.audio_duration_seconds;
137 const sessionDuration = data.session_duration_seconds;
138 console.log(
139 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
140 );
141 }
142 } catch (error) {
143 console.error(`\nError handling message: ${error}`);
144 console.error(`Message data: ${message}`);
145 }
146 });
147
148 ws.on("error", (error) => {
149 console.error(`\nWebSocket Error: ${error}`);
150 cleanup();
151 });
152
153 ws.on("close", (code, reason) => {
154 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
155 cleanup();
156 });
157
158 // Handle process termination
159 setupTerminationHandlers();
160}
161
162function startMicrophone() {
163 try {
164 micInstance = mic({
165 rate: SAMPLE_RATE.toString(),
166 channels: CHANNELS.toString(),
167 debug: false,
168 exitOnSilence: 6, // This won't actually exit, just a parameter for mic
169 });
170
171 micInputStream = micInstance.getAudioStream();
172
173 micInputStream.on("data", (data) => {
174 if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
175 // Store audio data for WAV recording
176 recordedFrames.push(Buffer.from(data));
177
178 // Send audio data to WebSocket
179 ws.send(data);
180 }
181 });
182
183 micInputStream.on("error", (err) => {
184 console.error(`Microphone Error: ${err}`);
185 cleanup();
186 });
187
188 micInstance.start();
189 console.log("Microphone stream opened successfully.");
190 console.log("Speak into your microphone. Press Ctrl+C to stop.");
191 } catch (error) {
192 console.error(`Error opening microphone stream: ${error}`);
193 cleanup();
194 }
195}
196
197function cleanup() {
198 stopRequested = true;
199
200 // Save recorded audio to WAV file
201 saveWavFile();
202
203 // Stop microphone if it's running
204 if (micInstance) {
205 try {
206 micInstance.stop();
207 } catch (error) {
208 console.error(`Error stopping microphone: ${error}`);
209 }
210 micInstance = null;
211 }
212
213 // Close WebSocket connection if it's open
214 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
215 try {
216 // Send termination message if possible
217 if (ws.readyState === WebSocket.OPEN) {
218 const terminateMessage = { type: "Terminate" };
219 console.log(
220 `Sending termination message: ${JSON.stringify(terminateMessage)}`
221 );
222 ws.send(JSON.stringify(terminateMessage));
223 }
224 ws.close();
225 } catch (error) {
226 console.error(`Error closing WebSocket: ${error}`);
227 }
228 ws = null;
229 }
230
231 console.log("Cleanup complete.");
232}
233
234function setupTerminationHandlers() {
235 // Handle Ctrl+C and other termination signals
236 process.on("SIGINT", () => {
237 console.log("\nCtrl+C received. Stopping...");
238 cleanup();
239 // Give time for cleanup before exiting
240 setTimeout(() => process.exit(0), 1000);
241 });
242
243 process.on("SIGTERM", () => {
244 console.log("\nTermination signal received. Stopping...");
245 cleanup();
246 // Give time for cleanup before exiting
247 setTimeout(() => process.exit(0), 1000);
248 });
249
250 // Handle uncaught exceptions
251 process.on("uncaughtException", (error) => {
252 console.error(`\nUncaught exception: ${error}`);
253 cleanup();
254 // Give time for cleanup before exiting
255 setTimeout(() => process.exit(1), 1000);
256 });
257}
258
259// Start the application
260run();

Configuration

To utilize keyterm boosting, you need to include your desired keyterms as query parameters in the WebSocket URL.

  • You can include a maximum of 100 keyterms per session.
  • Each individual keyterm string must be between 5 and 50 characters in length.
  • The format_turns parameter must be set to True for keyterm boosting to be applied.

Important notes

  • Only final formatted transcripts receive keyterm boosting.
  • Keyterm phrases outside the 5-50 character range are ignored.
  • Requests containing more than 100 keyterms will result in an error.

Best practices

To maximize the effectiveness of keyterm boosting:

  • Specify Unique Terminology: Include proper names, company names, technical terms, or vocabulary specific to your domain that might not be commonly recognized.
  • Exact Spelling and Capitalization: Provide keyterms with the precise spelling and capitalization you expect to see in the output transcript. This helps the system accurately identify the terms.
  • Avoid Common Words: Do not include single, common English words (e.g., “information”) as keyterms. The system is generally proficient with such words, and adding them as keyterms can be redundant.