Keyterms Boosting | AssemblyAI

The keyterm boosting feature helps improve recognition accuracy for specific words and phrases that are important to your use case.

Keyterms Boosting is currently in beta and available to all users free of charge. Pricing is still being finalized and may apply in the future.

As we continue to develop this feature, functionality may evolve. For the latest updates and code examples, please refer to this page.

Quickstart

Javascript

Python

Firstly, install the required dependencies.

$ npm install ws mic

Javascript

Python

1 const WebSocket = require("ws");
2 const mic = require("mic");
3 const querystring = require("querystring");
4 const fs = require("fs");
5 
6 // --- Configuration ---
7 const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8 const CONNECTION_PARAMS = {
9   sample_rate: 16000,
10   format_turns: true, // Request formatted final transcripts
11   keyterms: JSON.stringify(["Keanu Reeves", "AssemblyAI", "Universal-2"]),
12 };
13 const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
14 const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
15 
16 // Audio Configuration
17 const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
18 const CHANNELS = 1;
19 
20 // Global variables
21 let micInstance = null;
22 let micInputStream = null;
23 let ws = null;
24 let stopRequested = false;
25 
26 // WAV recording variables
27 let recordedFrames = []; // Store audio frames for WAV file
28 
29 // --- Helper functions ---
30 function clearLine() {
31   process.stdout.write("\r" + " ".repeat(80) + "\r");
32 }
33 
34 function formatTimestamp(timestamp) {
35   return new Date(timestamp * 1000).toISOString();
36 }
37 
38 function createWavHeader(sampleRate, channels, dataLength) {
39   const buffer = Buffer.alloc(44);
40 
41   // RIFF header
42   buffer.write("RIFF", 0);
43   buffer.writeUInt32LE(36 + dataLength, 4);
44   buffer.write("WAVE", 8);
45 
46   // fmt chunk
47   buffer.write("fmt ", 12);
48   buffer.writeUInt32LE(16, 16); // fmt chunk size
49   buffer.writeUInt16LE(1, 20); // PCM format
50   buffer.writeUInt16LE(channels, 22);
51   buffer.writeUInt32LE(sampleRate, 24);
52   buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
53   buffer.writeUInt16LE(channels * 2, 32); // block align
54   buffer.writeUInt16LE(16, 34); // bits per sample
55 
56   // data chunk
57   buffer.write("data", 36);
58   buffer.writeUInt32LE(dataLength, 40);
59 
60   return buffer;
61 }
62 
63 function saveWavFile() {
64   if (recordedFrames.length === 0) {
65     console.log("No audio data recorded.");
66     return;
67   }
68 
69   // Generate filename with timestamp
70   const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
71   const filename = `recorded_audio_${timestamp}.wav`;
72 
73   try {
74     // Combine all recorded frames
75     const audioData = Buffer.concat(recordedFrames);
76     const dataLength = audioData.length;
77 
78     // Create WAV header
79     const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
80 
81     // Write WAV file
82     const wavFile = Buffer.concat([wavHeader, audioData]);
83     fs.writeFileSync(filename, wavFile);
84 
85     console.log(`Audio saved to: ${filename}`);
86     console.log(
87       `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
88     );
89   } catch (error) {
90     console.error(`Error saving WAV file: ${error}`);
91   }
92 }
93 
94 // --- Main function ---
95 async function run() {
96   console.log("Starting AssemblyAI real-time transcription...");
97   console.log("Audio will be saved to a WAV file when the session ends.");
98 
99   // Initialize WebSocket connection
100   ws = new WebSocket(API_ENDPOINT, {
101     headers: {
102       Authorization: YOUR_API_KEY,
103     },
104   });
105 
106   // Setup WebSocket event handlers
107   ws.on("open", () => {
108     console.log("WebSocket connection opened.");
109     console.log(`Connected to: ${API_ENDPOINT}`);
110     // Start the microphone
111     startMicrophone();
112   });
113 
114   ws.on("message", (message) => {
115     try {
116       const data = JSON.parse(message);
117       const msgType = data.type;
118 
119       if (msgType === "Begin") {
120         const sessionId = data.id;
121         const expiresAt = data.expires_at;
122         console.log(
123           `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
124         );
125       } else if (msgType === "Turn") {
126         const transcript = data.transcript || "";
127         const formatted = data.turn_is_formatted;
128 
129         if (formatted) {
130           clearLine();
131           console.log(transcript);
132         } else {
133           process.stdout.write(`\r${transcript}`);
134         }
135       } else if (msgType === "Termination") {
136         const audioDuration = data.audio_duration_seconds;
137         const sessionDuration = data.session_duration_seconds;
138         console.log(
139           `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
140         );
141       }
142     } catch (error) {
143       console.error(`\nError handling message: ${error}`);
144       console.error(`Message data: ${message}`);
145     }
146   });
147 
148   ws.on("error", (error) => {
149     console.error(`\nWebSocket Error: ${error}`);
150     cleanup();
151   });
152 
153   ws.on("close", (code, reason) => {
154     console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
155     cleanup();
156   });
157 
158   // Handle process termination
159   setupTerminationHandlers();
160 }
161 
162 function startMicrophone() {
163   try {
164     micInstance = mic({
165       rate: SAMPLE_RATE.toString(),
166       channels: CHANNELS.toString(),
167       debug: false,
168       exitOnSilence: 6, // This won't actually exit, just a parameter for mic
169     });
170 
171     micInputStream = micInstance.getAudioStream();
172 
173     micInputStream.on("data", (data) => {
174       if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
175         // Store audio data for WAV recording
176         recordedFrames.push(Buffer.from(data));
177 
178         // Send audio data to WebSocket
179         ws.send(data);
180       }
181     });
182 
183     micInputStream.on("error", (err) => {
184       console.error(`Microphone Error: ${err}`);
185       cleanup();
186     });
187 
188     micInstance.start();
189     console.log("Microphone stream opened successfully.");
190     console.log("Speak into your microphone. Press Ctrl+C to stop.");
191   } catch (error) {
192     console.error(`Error opening microphone stream: ${error}`);
193     cleanup();
194   }
195 }
196 
197 function cleanup() {
198   stopRequested = true;
199 
200   // Save recorded audio to WAV file
201   saveWavFile();
202 
203   // Stop microphone if it's running
204   if (micInstance) {
205     try {
206       micInstance.stop();
207     } catch (error) {
208       console.error(`Error stopping microphone: ${error}`);
209     }
210     micInstance = null;
211   }
212 
213   // Close WebSocket connection if it's open
214   if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
215     try {
216       // Send termination message if possible
217       if (ws.readyState === WebSocket.OPEN) {
218         const terminateMessage = { type: "Terminate" };
219         console.log(
220           `Sending termination message: ${JSON.stringify(terminateMessage)}`
221         );
222         ws.send(JSON.stringify(terminateMessage));
223       }
224       ws.close();
225     } catch (error) {
226       console.error(`Error closing WebSocket: ${error}`);
227     }
228     ws = null;
229   }
230 
231   console.log("Cleanup complete.");
232 }
233 
234 function setupTerminationHandlers() {
235   // Handle Ctrl+C and other termination signals
236   process.on("SIGINT", () => {
237     console.log("\nCtrl+C received. Stopping...");
238     cleanup();
239     // Give time for cleanup before exiting
240     setTimeout(() => process.exit(0), 1000);
241   });
242 
243   process.on("SIGTERM", () => {
244     console.log("\nTermination signal received. Stopping...");
245     cleanup();
246     // Give time for cleanup before exiting
247     setTimeout(() => process.exit(0), 1000);
248   });
249 
250   // Handle uncaught exceptions
251   process.on("uncaughtException", (error) => {
252     console.error(`\nUncaught exception: ${error}`);
253     cleanup();
254     // Give time for cleanup before exiting
255     setTimeout(() => process.exit(1), 1000);
256   });
257 }
258 
259 // Start the application
260 run();

Configuration

To utilize keyterm boosting, you need to include your desired keyterms as query parameters in the WebSocket URL.

You can include a maximum of 100 keyterms per session.
Each individual keyterm string must be between 5 and 50 characters in length.
The format_turns parameter must be set to True for keyterm boosting to be applied.

Important notes

Only final formatted transcripts receive keyterm boosting.
Keyterm phrases outside the 5-50 character range are ignored.
Requests containing more than 100 keyterms will result in an error.

Best practices

To maximize the effectiveness of keyterm boosting:

Specify Unique Terminology: Include proper names, company names, technical terms, or vocabulary specific to your domain that might not be commonly recognized.
Exact Spelling and Capitalization: Provide keyterms with the precise spelling and capitalization you expect to see in the output transcript. This helps the system accurately identify the terms.
Avoid Common Words: Do not include single, common English words (e.g., “information”) as keyterms. The system is generally proficient with such words, and adding them as keyterms can be redundant.