Transcribe audio files with Streaming | AssemblyAI

This guide shows you how to transcribe WAV audio files with varying sample rates using our Streaming API.

Quickstart

Here is the complete AssemblyAI Python SDK script to transcribe a WAV audio file using the Streaming API.

1 import assemblyai as aai
2 from assemblyai.streaming.v3 import (
3     BeginEvent,
4     StreamingClient,
5     StreamingClientOptions,
6     StreamingError,
7     StreamingEvents,
8     StreamingParameters,
9     TerminationEvent,
10     TurnEvent
11 )
12 from typing import Type
13 import sys
14 
15 YOUR_API_KEY = "YOUR_API_KEY"  # Replace with your AssemblyAI API key
16 AUDIO_FILE = "audio.wav"  # Path to your audio file
17 SAMPLE_RATE = 48000  # Change to match the sample rate of your audio file
18 SAVE_TRANSCRIPT_TO_FILE = True  # Set to False to disable saving transcript to file
19 PLAY_AUDIO = True  # Set to False to disable audio playback
20 
21 # Track session data for output file
22 session_data = {
23     "session_id": None,
24     "parameters": None,
25     "audio_file": AUDIO_FILE,
26     "audio_duration_seconds": None,
27     "turns": []
28 }
29 
30 def on_begin(self: Type[StreamingClient], event: BeginEvent):
31   "This function is called when the connection has been established."
32 
33   session_data["session_id"] = event.id
34   print("Session ID:", event.id, "\n")
35 
36 def on_turn(self: Type[StreamingClient], event: TurnEvent):
37   "This function is called when a new transcript has been received."
38 
39   # Skip empty transcripts
40   if not event.transcript:
41     return
42 
43   # Determine status label
44   if not event.end_of_turn:
45     status = "[Partial]"
46   elif event.turn_is_formatted:
47     status = "[Final (formatted)]"
48   else:
49     status = "[Final (unformatted)]"
50 
51   print(f"{status}: {event.transcript}")
52 
53   # Track final turns (formatted if formatting is enabled, otherwise just final)
54   is_final = event.end_of_turn and (not streaming_params.format_turns or event.turn_is_formatted)
55   if is_final:
56     session_data["turns"].append(event.transcript)
57     print()  # Add blank line after final formatted turn for cleaner output
58 
59 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
60   "This function is called when the session has ended."
61 
62   session_data["audio_duration_seconds"] = event.audio_duration_seconds
63   print(
64     f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
65   )
66 
67 def on_error(self: Type[StreamingClient], error: StreamingError):
68   "This function is called when an error occurs."
69 
70   print(f"Error occurred: {error}")
71 
72 def save_transcript():
73   "Save the transcript to a file in the same directory as the script."
74   from pathlib import Path
75 
76   # Get the audio file name (handles both absolute and relative paths)
77   audio_name = Path(session_data["audio_file"]).stem
78 
79   # Generate filename: {file_name}_{session_id}.txt in the same directory as script
80   session_id = session_data["session_id"] or "unknown"
81   output_file = f"{audio_name}_{session_id}.txt"
82 
83   with open(output_file, "w") as f:
84     f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
85     f.write(f"Audio file: {session_data['audio_file']}\n")
86     f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
87     f.write(f"Parameters used: {session_data['parameters']}\n")
88     f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#request.query\n\n")
89 
90     f.write("\nTranscription Output\n")
91     for i, turn in enumerate(session_data["turns"], 1):
92       f.write(f"[Turn #{i}]: {turn}\n")
93 
94   print(f"Transcript saved to {output_file}")
95 
96 
97 # Create the streaming client
98 client = StreamingClient(
99   StreamingClientOptions(
100     api_key=YOUR_API_KEY
101   )
102 )
103 
104 client.on(StreamingEvents.Begin, on_begin)
105 client.on(StreamingEvents.Turn, on_turn)
106 client.on(StreamingEvents.Termination, on_terminated)
107 client.on(StreamingEvents.Error, on_error)
108 
109 def validate_audio_file(filepath: str, sample_rate: int):
110     """Validate audio file before streaming"""
111     import wave
112     from pathlib import Path
113 
114     # Check file extension
115     file_ext = Path(filepath).suffix.lower()
116     if file_ext != ".wav":
117         print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
118         print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
119         sys.exit(1)
120 
121     with wave.open(filepath, 'rb') as wav_file:
122         if wav_file.getnchannels() != 1:
123             print("Error: Only mono audio is supported", file=sys.stderr)
124             print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
125             sys.exit(1)
126 
127         file_sample_rate = wav_file.getframerate()
128         if file_sample_rate != sample_rate:
129             print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
130             print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
131             sys.exit(1)
132 
133 def stream_file(filepath: str, sample_rate: int, play_audio: bool = False):
134     """Stream audio file in 50ms chunks, optionally playing audio"""
135     import time
136     import wave
137 
138     chunk_duration = 0.05
139     audio_player = None
140 
141     if play_audio:
142         try:
143             import pyaudio
144             p = pyaudio.PyAudio()
145             with wave.open(filepath, 'rb') as wav_file:
146                 audio_player = p.open(
147                     format=p.get_format_from_width(wav_file.getsampwidth()),
148                     channels=wav_file.getnchannels(),
149                     rate=wav_file.getframerate(),
150                     output=True
151                 )
152         except ImportError:
153             print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
154             print("Install with: pip install pyaudio", file=sys.stderr)
155             play_audio = False
156 
157     try:
158         with wave.open(filepath, 'rb') as wav_file:
159             frames_per_chunk = int(sample_rate * chunk_duration)
160 
161             while True:
162                 frames = wav_file.readframes(frames_per_chunk)
163 
164                 if not frames:
165                     break
166 
167                 if audio_player:
168                     audio_player.write(frames)
169                 else:
170                     time.sleep(chunk_duration)
171 
172                 yield frames
173     finally:
174         if audio_player:
175             audio_player.stop_stream()
176             audio_player.close()
177             p.terminate()
178 
179 # Validate audio file before connecting
180 validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
181 
182 file_stream = stream_file(
183   filepath=AUDIO_FILE,
184   sample_rate=SAMPLE_RATE,
185   play_audio=PLAY_AUDIO,
186 )
187 
188 # Configure streaming parameters
189 streaming_params = StreamingParameters(
190     sample_rate=SAMPLE_RATE,
191     format_turns=True,
192     speech_model="universal-streaming-english",
193 )
194 
195 # Store parameters for output file (dynamically capture all set parameters)
196 session_data["parameters"] = ", ".join(
197     f"{k}={v}" for k, v in streaming_params.__dict__.items() if v is not None
198 )
199 
200 # Warn if using default turn detection parameters
201 turn_params = ["end_of_turn_confidence_threshold", "min_turn_silence", "max_turn_silence"]
202 if not any(getattr(streaming_params, p, None) is not None for p in turn_params):
203     print("Warning: Using default turn detection parameters. For best results, fine-tune to your use case:")
204     print("https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations\n")
205 
206 client.connect(streaming_params)
207 
208 try:
209     client.stream(file_stream)
210 finally:
211     client.disconnect(terminate=True)
212     if SAVE_TRANSCRIPT_TO_FILE:
213         save_transcript()

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install and import packages

Install the AssemblyAI Python SDK and pyaudio for audio playback.

$ pip install assemblyai
$ pip install pyaudio

Import packages.

1 import assemblyai as aai
2 from assemblyai.streaming.v3 import (
3     BeginEvent,
4     StreamingClient,
5     StreamingClientOptions,
6     StreamingError,
7     StreamingEvents,
8     StreamingParameters,
9     TerminationEvent,
10     TurnEvent
11 )
12 from typing import Type
13 import sys

Configure settings

Replace YOUR_API_KEY with your API key.

Set AUDIO_FILE to the relative or absolute path of your audio file, and set SAMPLE_RATE to match your file’s sample rate.

See Connect the client to configure all other stream parameters.

1 YOUR_API_KEY = "YOUR_API_KEY"  # Replace with your AssemblyAI API key
2 AUDIO_FILE = "audio.wav"  # Path to your audio file
3 SAMPLE_RATE = 48000  # Change to match the sample rate of your audio file
4 SAVE_TRANSCRIPT_TO_FILE = True  # Set to False to disable saving transcript to file
5 PLAY_AUDIO = True  # Set to False to disable audio playback
6 
7 # Track session data for output file
8 session_data = {
9     "session_id": None,
10     "parameters": None,
11     "audio_file": AUDIO_FILE,
12     "audio_duration_seconds": None,
13     "turns": []
14 }

Websocket Event Handlers

1 def on_begin(self: Type[StreamingClient], event: BeginEvent):
2   "This function is called when the connection has been established."
3 
4   session_data["session_id"] = event.id
5   print("Session ID:", event.id, "\n")
6 
7 def on_turn(self: Type[StreamingClient], event: TurnEvent):
8   "This function is called when a new transcript has been received."
9 
10   # Skip empty transcripts
11   if not event.transcript:
12     return
13 
14   # Determine status label
15   if not event.end_of_turn:
16     status = "[Partial]"
17   elif event.turn_is_formatted:
18     status = "[Final (formatted)]"
19   else:
20     status = "[Final (unformatted)]"
21 
22   print(f"{status}: {event.transcript}")
23 
24   # Track final turns (formatted if formatting is enabled, otherwise just final)
25   is_final = event.end_of_turn and (not streaming_params.format_turns or event.turn_is_formatted)
26   if is_final:
27     session_data["turns"].append(event.transcript)
28     print()  # Add blank line after final formatted turn for cleaner output
29 
30 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
31   "This function is called when the session has ended."
32 
33   session_data["audio_duration_seconds"] = event.audio_duration_seconds
34   print(
35     f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
36   )
37 
38 def on_error(self: Type[StreamingClient], error: StreamingError):
39   "This function is called when an error occurs."
40 
41   print(f"Error occurred: {error}")

Create the streaming client

1 # Create the streaming client
2 client = StreamingClient(
3   StreamingClientOptions(
4     api_key=YOUR_API_KEY
5   )
6 )
7 
8 client.on(StreamingEvents.Begin, on_begin)
9 client.on(StreamingEvents.Turn, on_turn)
10 client.on(StreamingEvents.Termination, on_terminated)
11 client.on(StreamingEvents.Error, on_error)

Helper functions for streaming files

The following helper functions are used to validate audio files, stream audio in chunks, and save the transcript output:

validate_audio_file() - Validates that the audio file is a mono WAV file with the expected sample rate.
stream_file() - Streams the audio file in 50ms chunks, optionally playing audio through speakers.
save_transcript() - Saves the transcript to a text file after the session ends.

1 def save_transcript():
2   "Save the transcript to a file in the same directory as the script."
3   from pathlib import Path
4 
5   # Get the audio file name (handles both absolute and relative paths)
6   audio_name = Path(session_data["audio_file"]).stem
7 
8   # Generate filename: {file_name}_{session_id}.txt in the same directory as script
9   session_id = session_data["session_id"] or "unknown"
10   output_file = f"{audio_name}_{session_id}.txt"
11 
12   with open(output_file, "w") as f:
13     f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
14     f.write(f"Audio file: {session_data['audio_file']}\n")
15     f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
16     f.write(f"Parameters used: {session_data['parameters']}\n")
17     f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#request.query\n\n")
18 
19     f.write("\nTranscription Output\n")
20     for i, turn in enumerate(session_data["turns"], 1):
21       f.write(f"[Turn #{i}]: {turn}\n")
22 
23   print(f"Transcript saved to {output_file}")
24 
25 def validate_audio_file(filepath: str, sample_rate: int):
26     """Validate audio file before streaming"""
27     import wave
28     from pathlib import Path
29 
30     # Check file extension
31     file_ext = Path(filepath).suffix.lower()
32     if file_ext != ".wav":
33         print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
34         print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
35         sys.exit(1)
36 
37     with wave.open(filepath, 'rb') as wav_file:
38         if wav_file.getnchannels() != 1:
39             print("Error: Only mono audio is supported", file=sys.stderr)
40             print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
41             sys.exit(1)
42 
43         file_sample_rate = wav_file.getframerate()
44         if file_sample_rate != sample_rate:
45             print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
46             print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
47             sys.exit(1)
48 
49 def stream_file(filepath: str, sample_rate: int, play_audio: bool = False):
50     """Stream audio file in 50ms chunks, optionally playing audio"""
51     import time
52     import wave
53 
54     chunk_duration = 0.05
55     audio_player = None
56 
57     if play_audio:
58         try:
59             import pyaudio
60             p = pyaudio.PyAudio()
61             with wave.open(filepath, 'rb') as wav_file:
62                 audio_player = p.open(
63                     format=p.get_format_from_width(wav_file.getsampwidth()),
64                     channels=wav_file.getnchannels(),
65                     rate=wav_file.getframerate(),
66                     output=True
67                 )
68         except ImportError:
69             print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
70             print("Install with: pip install pyaudio", file=sys.stderr)
71             play_audio = False
72 
73     try:
74         with wave.open(filepath, 'rb') as wav_file:
75             frames_per_chunk = int(sample_rate * chunk_duration)
76 
77             while True:
78                 frames = wav_file.readframes(frames_per_chunk)
79 
80                 if not frames:
81                     break
82 
83                 if audio_player:
84                     audio_player.write(frames)
85                 else:
86                     time.sleep(chunk_duration)
87 
88                 yield frames
89     finally:
90         if audio_player:
91             audio_player.stop_stream()
92             audio_player.close()
93             p.terminate()
94 
95 # Validate audio file before connecting
96 validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
97 
98 file_stream = stream_file(
99   filepath=AUDIO_FILE,
100   sample_rate=SAMPLE_RATE,
101   play_audio=PLAY_AUDIO,
102 )

Connect the client

A warning is printed if default turn detection parameters are used. This is fine for testing, but for best accuracy and optimal performance, see our recommended settings.

1 # Configure streaming parameters
2 streaming_params = StreamingParameters(
3     sample_rate=SAMPLE_RATE,
4     format_turns=True,
5     speech_model="universal-streaming-english",
6 )
7 
8 # Store parameters for output file (dynamically capture all set parameters)
9 session_data["parameters"] = ", ".join(
10     f"{k}={v}" for k, v in streaming_params.__dict__.items() if v is not None
11 )
12 
13 # Warn if using default turn detection parameters
14 turn_params = ["end_of_turn_confidence_threshold", "min_turn_silence", "max_turn_silence"]
15 if not any(getattr(streaming_params, p, None) is not None for p in turn_params):
16     print("Warning: Using default turn detection parameters. For best results, fine-tune to your use case:")
17     print("https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations\n")
18 
19 client.connect(streaming_params)

Stream the file

1 try:
2     client.stream(file_stream)
3 finally:
4     client.disconnect(terminate=True)
5     if SAVE_TRANSCRIPT_TO_FILE:
6         save_transcript()

The session will terminate once the file is finished streaming. If SAVE_TRANSCRIPT_TO_FILE is enabled (default), the transcript will be saved to {audio_filename}_{session_id}.txt in the current working directory.

The AUDIO_FILE path can be either relative (e.g., audio.wav) or absolute (e.g., /path/to/audio.wav).

Example output

Here’s an example of what the console output looks like when streaming an audio file:

1 Warning: Using default turn detection parameters. For best results, fine-tune to your use case:
2 https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations
3 
4 Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d
5 
6 [Partial]: the
7 [Partial]: the quick
8 [Partial]: the quick brown
9 [Partial]: the quick brown fox
10 [Partial]: the quick brown fox jumps
11 [Partial]: the quick brown fox jumps over
12 [Partial]: the quick brown fox jumps over the
13 [Partial]: the quick brown fox jumps over the lazy
14 [Partial]: the quick brown fox jumps over the lazy dog
15 [Final (unformatted)]: the quick brown fox jumps over the lazy dog
16 [Final (formatted)]: The quick brown fox jumps over the lazy dog.
17 
18 [Partial]: it
19 [Partial]: it is
20 [Partial]: it is a
21 [Partial]: it is a common
22 [Partial]: it is a common typing
23 [Partial]: it is a common typing test
24 [Final (unformatted)]: it is a common typing test
25 [Final (formatted)]: It is a common typing test.
26 
27 Session terminated: 7.52 seconds of audio processed
28 Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt

The output shows:

Partial transcripts: Real-time updates as words are recognized
Final (unformatted): The complete turn before formatting is applied
Final (formatted): The final transcript with proper capitalization and punctuation