> ## Documentation Index
> Fetch the complete documentation index at: https://assemblyai.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Create Subtitles with Speaker Labels

## Quickstart

```python expandable theme={null}
import assemblyai as aai

# SETTINGS
aai.settings.api_key = "YOUR-API-KEY"
filename = "YOUR-FILE-NAME"
transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))
transcript = transcriber.transcribe(filename)

# Maximum number of words per subtitle
max_words_per_subtitle = 6

# Color assignments for speakers
speaker_colors = {
    "A": "red",
    "B": "orange",
    "C": "yellow",
    "D": "yellowgreen",
    "E": "green",
    "F": "lightskyblue",
    "G": "purple",
    "H": "mediumpurple",
    "I": "pink",
    "J": "brown",
}

# Process transcription segments
def process_segments(segments):
    srt_content = ""
    subtitle_index = 1
    for segment in segments:
        speaker = segment.speaker
        color = speaker_colors.get(speaker, "black")  # Default color is black

        # Split text into words and group into chunks
        words = segment.words
        for i in range(0, len(words), max_words_per_subtitle):
            chunk = words[i:i + max_words_per_subtitle]
            start_time = chunk[0].start  # -1 indicates continuation
            end_time = chunk[-1].end
            srt_content += create_subtitle(subtitle_index, start_time, end_time, chunk, color)
            subtitle_index += 1

    return srt_content


# Create a single subtitle
def create_subtitle(index, start_time, end_time, words, color):
    text = ""
    for word in words:
        text += word.text + ' '
    start_srt = format_time(start_time)
    end_srt = format_time(end_time)
    return f"{index}\n{start_srt} --> {end_srt}\n<font color=\"{color}\">{text}</font>\n\n"

# Format time in SRT style
def format_time(milliseconds):
    hours, remainder = divmod(milliseconds, 3600000)
    minutes, remainder = divmod(remainder, 60000)
    seconds, milliseconds = divmod(remainder, 1000)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds):03}"

# Generate SRT content
sentences = transcript.get_sentences()
srt_content = process_segments(sentences)

# Save to SRT file
with open(filename + '.srt', 'w') as file:
    file.write(srt_content)

print(f"SRT file generated: {filename}.srt")
```

This Colab will demonstrate how to use AssemblyAI's [Speaker Diarization](/pre-recorded-audio/label-speakers) model together to format subtitles according to their respective speaker.

## Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up](https://www.assemblyai.com/dashboard/signup) for an AssemblyAI account and get your API key from your [dashboard](https://www.assemblyai.com/dashboard/home).

```bash theme={null}
pip install assemblyai
```

First, we will configure our API key as well as our file to be transcribed. Then, we decide on a number of words we want to have per subtitle.

Lastly, we transcribe our file.

```python theme={null}
import assemblyai as aai

# SETTINGS
aai.settings.api_key = "YOUR-API-KEY"
filename = "YOUR-FILE-NAME"
transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))
transcript = transcriber.transcribe(filename)

# Maximum number of words per subtitle
max_words_per_subtitle = 6
```

## How the code works

`speaker_colors` is a dictionary that maps speaker identifiers (like "A", "B", "C", etc.) to specific colors. Each speaker in the transcription will be associated with a unique color in the subtitles.

When Speaker Diarization is enabled, sentences in our API response have a speaker code under the `speaker` key. We use the speaker code to determine the color of the subtitle text.

```python expandable theme={null}
# Color assignments for speakers
speaker_colors = {
    "A": "red",
    "B": "orange",
    "C": "yellow",
    "D": "yellowgreen",
    "E": "green",
    "F": "lightskyblue",
    "G": "purple",
    "H": "mediumpurple",
    "I": "pink",
    "J": "brown",
}

# Process transcription segments
def process_segments(segments):
    srt_content = ""
    subtitle_index = 1
    for segment in segments:
        speaker = segment.speaker
        color = speaker_colors.get(speaker, "black")  # Default color is black

        # Split text into words and group into chunks
        words = segment.words
        for i in range(0, len(words), max_words_per_subtitle):
            chunk = words[i:i + max_words_per_subtitle]
            start_time = chunk[0].start  # -1 indicates continuation
            end_time = chunk[-1].end
            srt_content += create_subtitle(subtitle_index, start_time, end_time, chunk, color)
            subtitle_index += 1

    return srt_content

# Create a single subtitle
def create_subtitle(index, start_time, end_time, words, color):
    text = ""
    for word in words:
        text += word.text + ' '
    start_srt = format_time(start_time)
    end_srt = format_time(end_time)
    return f"{index}\n{start_srt} --> {end_srt}\n<font color=\"{color}\">{text}</font>\n\n"

# Format time in SRT style
def format_time(milliseconds):
    hours, remainder = divmod(milliseconds, 3600000)
    minutes, remainder = divmod(remainder, 60000)
    seconds, milliseconds = divmod(remainder, 1000)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds):03}"
```

Our last step is to generate and save our subtitle file!

```python theme={null}
# Generate SRT content
sentences = transcript.get_sentences()
srt_content = process_segments(sentences)

# Save to SRT file
with open(filename + '.srt', 'w') as file:
    file.write(srt_content)

print(f"SRT file generated: {filename}.srt")
```
