Custom Spelling

Custom Spelling lets you customize how words are spelled or formatted in the transcript.

Python
Python SDK
JavaScript
JavaScript SDK

To use Custom Spelling, include custom_spelling in your transcription parameters. The parameter should be a list of dictionaries, with each dictionary specifying a mapping from a word or phrase to a new spelling or format of a word.

import requests
import time

base_url = "https://api.assemblyai.com"

headers = {
    "authorization": "<YOUR_API_KEY>"
}

with open("./my-audio.mp3", "rb") as f:
  response = requests.post(base_url + "/v2/upload",
                          headers=headers,
                          data=f)

upload_url = response.json()["upload_url"]

data = {
    "audio_url": upload_url, # You can also use a URL to an audio or video file on the web
    "language_detection": True,
    "custom_spelling": [
      {
        "from": ["Decarlo"],
        "to": "DeCarlo"
      },
      {
        "from": ["SQL"],
        "to": "Sequel"
      }
    ]
}

url = base_url + "/v2/transcript"
response = requests.post(url, json=data, headers=headers)

transcript_id = response.json()['id']
polling_endpoint = base_url + "/v2/transcript/" + transcript_id

while True:
  transcription_result = requests.get(polling_endpoint, headers=headers).json()

  if transcription_result['status'] == 'completed':
    print(f"Transcript ID: {transcript_id}")
    break

  elif transcription_result['status'] == 'error':
    raise RuntimeError(f"Transcription failed: {transcription_result['error']}")

  else:
    time.sleep(3)

To use Custom Spelling, pass a dictionary to set_custom_spelling() on the transcription config. Each key-value pair specifies a mapping from a word or phrase to a new spelling or format of a word. The key specifies the new spelling or format, and the corresponding value is the word or phrase you want to replace.

import assemblyai as aai

aai.settings.api_key = "<YOUR_API_KEY>"

# audio_file = "./local_file.mp3"
audio_file = "https://assembly.ai/wildfires.mp3"

config = aai.TranscriptionConfig(
  language_detection=True
)
config.set_custom_spelling(
  {
    "Gettleman": ["gettleman"],
    "SQL": ["Sequel"],
  }
)

transcript = aai.Transcriber(config=config).transcribe(audio_file)

if transcript.status == "error":
  raise RuntimeError(f"Transcription failed: {transcript.error}")

print(transcript.text)

To use Custom Spelling, include custom_spelling in your transcription parameters. The parameter should be an array of objects, with each object specifying a mapping from a word or phrase to a new spelling or format of a word.

import fs from "fs-extra";

const baseUrl = "https://api.assemblyai.com";

const headers = {
  authorization: "<YOUR_API_KEY>",
};

const path = "./my-audio.mp3";
const audioData = await fs.readFile(path);

let res = await fetch(`${baseUrl}/v2/upload`, {
  method: "POST",
  headers,
  body: audioData,
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const uploadResponse = await res.json();

const uploadUrl = uploadResponse.upload_url;

const data = {
  audio_url: uploadUrl, // You can also use a URL to an audio or video file on the web
  language_detection: true,
  custom_spelling: [
    {
      from: ["Decarlo"],
      to: "DeCarlo",
    },
    {
      from: ["SQL"],
      to: "Sequel",
    },
  ],
};

const url = `${baseUrl}/v2/transcript`;
res = await fetch(url, {
  method: "POST",
  headers: { ...headers, "Content-Type": "application/json" },
  body: JSON.stringify(data),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const response = await res.json();

const transcriptId = response.id;
const pollingEndpoint = `${baseUrl}/v2/transcript/${transcriptId}`;

while (true) {
  res = await fetch(pollingEndpoint, { headers });
  if (!res.ok) throw new Error(`Error: ${res.status}`);
  const transcriptionResult = await res.json();

  if (transcriptionResult.status === "completed") {
    console.log(transcriptionResult.text);
    break;
  } else if (transcriptionResult.status === "error") {
    throw new Error(`Transcription failed: ${transcriptionResult.error}`);
  } else {
    await new Promise((resolve) => setTimeout(resolve, 3000));
  }
}

import { AssemblyAI } from "assemblyai";

const client = new AssemblyAI({
  apiKey: "<YOUR_API_KEY>",
});

// const audioFile = './local_file.mp3'
const audioFile = "https://assembly.ai/wildfires.mp3";

const params = {
  audio: audioFile,
  language_detection: true,
  custom_spelling: [
    {
      from: ["Decarlo"],
      to: "DeCarlo",
    },
    {
      from: ["Sequel"],
      to: "SQL",
    },
  ],
};

const run = async () => {
  const transcript = await client.transcripts.transcribe(params);

  console.log(transcript.text);
};

run();

The value in the to key is case-sensitive, but the value in the from key isn’t. Additionally, the to key must only contain one word, while the from key can contain multiple words.

Getting started

Features

API reference

Advanced

Guides