> ## Documentation Index
> Fetch the complete documentation index at: https://assemblyai.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Redact PII from Text Using LLM Gateway

This guide will show you how to use AssemblyAI's LLM Gateway to redact personally identifiable information (PII) from text.

## Quickstart

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    ```python expandable theme={null}
    import requests
    import time
    import json
    import re

    base_url = "https://api.assemblyai.com"
    headers = {"authorization": "<YOUR_API_KEY>"}

    def generate_ner(transcript_text):
        prompt = '''
        You will be given a transcript of a conversation or text. Your task is to generate named entities from the given transcript text.

        Please identify and extract the following named entities from the transcript:

        1. Person names
        2. Organization names
        3. Email addresses
        4. Phone numbers
        5. Full addresses

        When extracting these entities, make sure to return the exact spelling and formatting as they appear in the transcript. Do not modify or standardize the entities in any way.

        Present your results in a JSON format with a single field named "named_entities". This field should contain an array of strings, where each string is a named entity you've identified. For example:
        {
          "named_entities": ["John Doe", "Acme Corp", "john.doe@example.com", "123-456-7890", "123 Main St, Anytown, USA 12345"]
        }

        Important: Do not include any other information, explanations, or text in your response. Your output should consist solely of the JSON object containing the named entities.

        If you do not find any named entities of a particular type, simply return an empty array for the "named_entities" field.
        '''

        llm_gateway_data = {
            "model": "claude-sonnet-4-5-20250929",
            "messages": [
                {"role": "user", "content": f"{prompt}\n\nTranscript: {transcript_text}"}
            ],
            "max_tokens": 1000,
            "temperature": 0.0
        }

        response = requests.post(
            "https://llm-gateway.assemblyai.com/v1/chat/completions",
            headers=headers,
            json=llm_gateway_data
        )

        result = response.json()["choices"][0]["message"]["content"]

        try:
            res_json = json.loads(result)
        except:
            res_json = {'named_entities': []}

        named_entities = res_json.get('named_entities', [])
        return named_entities

    # Step 1: Transcribe audio
    with open("./my-audio.mp3", "rb") as f:
        response = requests.post(base_url + "/v2/upload", headers=headers, data=f)

    upload_url = response.json()["upload_url"]
    data = {"audio_url": upload_url}

    response = requests.post(base_url + "/v2/transcript", json=data, headers=headers)
    transcript_id = response.json()['id']
    polling_endpoint = base_url + "/v2/transcript/" + transcript_id

    while True:
        transcription_result = requests.get(polling_endpoint, headers=headers).json()
        if transcription_result['status'] == 'completed':
            break
        elif transcription_result['status'] == 'error':
            raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
        else:
            time.sleep(3)

    # Step 2: Split transcript into sentences and redact PII
    transcript_text = transcription_result['text']
    sentences = re.split(r'[.!?]+', transcript_text)
    redacted_transcript = ''

    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence:
            continue

        generated_entities = generate_ner(sentence)
        redacted_sentence = sentence

        for entity in generated_entities:
            redacted_sentence = redacted_sentence.replace(entity, '#' * len(entity))

        redacted_transcript += redacted_sentence + '. '
        print(redacted_sentence)

    print('\nFull redacted transcript:')
    print(redacted_transcript)
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    ```javascript expandable theme={null}
    import fs from "fs-extra";

    const baseUrl = "https://api.assemblyai.com";
    const headers = { authorization: "<YOUR_API_KEY>" };

    async function generateNer(transcriptText) {
      const prompt = `
        You will be given a transcript of a conversation or text. Your task is to generate named entities from the given transcript text.

        Please identify and extract the following named entities from the transcript:

        1. Person names
        2. Organization names
        3. Email addresses
        4. Phone numbers
        5. Full addresses

        When extracting these entities, make sure to return the exact spelling and formatting as they appear in the transcript. Do not modify or standardize the entities in any way.

        Present your results in a JSON format with a single field named "named_entities". This field should contain an array of strings, where each string is a named entity you've identified. For example:
        {
          "named_entities": ["John Doe", "Acme Corp", "john.doe@example.com", "123-456-7890", "123 Main St, Anytown, USA 12345"]
        }

        Important: Do not include any other information, explanations, or text in your response. Your output should consist solely of the JSON object containing the named entities.

        If you do not find any named entities of a particular type, simply return an empty array for the "named_entities" field.
        `;

      const llmGatewayData = {
        model: "claude-sonnet-4-5-20250929",
        messages: [
          { role: "user", content: `${prompt}\n\nTranscript: ${transcriptText}` },
        ],
        max_tokens: 1000,
        temperature: 0.0,
      };

      let res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
        method: "POST",
        headers: { ...headers, "Content-Type": "application/json" },
        body: JSON.stringify(llmGatewayData),
      });
      if (!res.ok) throw new Error(`Error: ${res.status}`);
      const response = await res.json();

      const result = response.choices[0].message.content;

      let resJson;
      try {
        resJson = JSON.parse(result);
      } catch (e) {
        resJson = { named_entities: [] };
      }

      const namedEntities = resJson.named_entities || [];
      return namedEntities;
    }

    // Step 1: Transcribe audio
    const fileData = await fs.readFile("./my-audio.mp3");
    let res = await fetch(baseUrl + "/v2/upload", {
      method: "POST",
      headers,
      body: fileData,
    });
    if (!res.ok) throw new Error(`Error: ${res.status}`);
    const uploadResponse = await res.json();

    const uploadUrl = uploadResponse.upload_url;
    const data = { audio_url: uploadUrl };

    res = await fetch(baseUrl + "/v2/transcript", {
      method: "POST",
      headers: { ...headers, "Content-Type": "application/json" },
      body: JSON.stringify(data),
    });
    if (!res.ok) throw new Error(`Error: ${res.status}`);
    const transcriptResponse = await res.json();
    const transcriptId = transcriptResponse.id;
    const pollingEndpoint = baseUrl + "/v2/transcript/" + transcriptId;

    let transcriptionResult;
    while (true) {
      res = await fetch(pollingEndpoint, { headers });
      if (!res.ok) throw new Error(`Error: ${res.status}`);
      transcriptionResult = await res.json();
      if (transcriptionResult.status === "completed") {
        break;
      } else if (transcriptionResult.status === "error") {
        throw new Error(`Transcription failed: ${transcriptionResult.error}`);
      } else {
        await new Promise((resolve) => setTimeout(resolve, 3000));
      }
    }

    // Step 2: Split transcript into sentences and redact PII
    const transcriptText = transcriptionResult.text;
    const sentences = transcriptText.split(/[.!?]+/);
    let redactedTranscript = "";

    for (let sentence of sentences) {
      sentence = sentence.trim();
      if (!sentence) continue;

      const generatedEntities = await generateNer(sentence);
      let redactedSentence = sentence;

      for (const entity of generatedEntities) {
        redactedSentence = redactedSentence.replaceAll(entity, "#".repeat(entity.length));
      }

      redactedTranscript += redactedSentence + ". ";
      console.log(redactedSentence);
    }

    console.log("\nFull redacted transcript:");
    console.log(redactedTranscript);
    ```
  </Tab>
</Tabs>

## Get Started

Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up](https://assemblyai.com/dashboard/signup) for an account and get your API key from your dashboard.

## Step-by-Step Instructions

### Install dependencies

Install the required packages:

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    ```bash theme={null}
    pip install requests
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    ```bash theme={null}
    npm install fs-extra
    ```
  </Tab>
</Tabs>

Import the required packages and set up your API client:

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    ```python theme={null}
    import requests
    import time
    import json
    import re

    base_url = "https://api.assemblyai.com"
    headers = {"authorization": "<YOUR_API_KEY>"}
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    ```javascript theme={null}
    import fs from "fs-extra";

    const baseUrl = "https://api.assemblyai.com";
    const headers = { authorization: "<YOUR_API_KEY>" };
    ```
  </Tab>
</Tabs>

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    Define a function `generate_ner` that uses LLM Gateway to identify named entities (person names, organizations, emails, phone numbers, addresses) in a given text.

    ```python expandable theme={null}
    def generate_ner(transcript_text):
        prompt = '''
        You will be given a transcript of a conversation or text. Your task is to generate named entities from the given transcript text.

        Please identify and extract the following named entities from the transcript:

        1. Person names
        2. Organization names
        3. Email addresses
        4. Phone numbers
        5. Full addresses

        When extracting these entities, make sure to return the exact spelling and formatting as they appear in the transcript. Do not modify or standardize the entities in any way.

        Present your results in a JSON format with a single field named "named_entities". This field should contain an array of strings, where each string is a named entity you've identified. For example:
        {
          "named_entities": ["John Doe", "Acme Corp", "john.doe@example.com", "123-456-7890", "123 Main St, Anytown, USA 12345"]
        }

        Important: Do not include any other information, explanations, or text in your response. Your output should consist solely of the JSON object containing the named entities.

        If you do not find any named entities of a particular type, simply return an empty array for the "named_entities" field.
        '''

        llm_gateway_data = {
            "model": "claude-sonnet-4-5-20250929",
            "messages": [
                {"role": "user", "content": f"{prompt}\n\nTranscript: {transcript_text}"}
            ],
            "max_tokens": 1000,
            "temperature": 0.0
        }

        response = requests.post(
            "https://llm-gateway.assemblyai.com/v1/chat/completions",
            headers=headers,
            json=llm_gateway_data
        )

        result = response.json()["choices"][0]["message"]["content"]

        try:
            res_json = json.loads(result)
        except:
            res_json = {'named_entities': []}

        named_entities = res_json.get('named_entities', [])
        return named_entities
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    Define a function `generateNer` that uses LLM Gateway to identify named entities (person names, organizations, emails, phone numbers, addresses) in a given text.

    ```javascript expandable theme={null}
    async function generateNer(transcriptText) {
      const prompt = `
        You will be given a transcript of a conversation or text. Your task is to generate named entities from the given transcript text.

        Please identify and extract the following named entities from the transcript:

        1. Person names
        2. Organization names
        3. Email addresses
        4. Phone numbers
        5. Full addresses

        When extracting these entities, make sure to return the exact spelling and formatting as they appear in the transcript. Do not modify or standardize the entities in any way.

        Present your results in a JSON format with a single field named "named_entities". This field should contain an array of strings, where each string is a named entity you've identified. For example:
        {
          "named_entities": ["John Doe", "Acme Corp", "john.doe@example.com", "123-456-7890", "123 Main St, Anytown, USA 12345"]
        }

        Important: Do not include any other information, explanations, or text in your response. Your output should consist solely of the JSON object containing the named entities.

        If you do not find any named entities of a particular type, simply return an empty array for the "named_entities" field.
        `;

      const llmGatewayData = {
        model: "claude-sonnet-4-5-20250929",
        messages: [
          { role: "user", content: `${prompt}\n\nTranscript: ${transcriptText}` },
        ],
        max_tokens: 1000,
        temperature: 0.0,
      };

      let res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
        method: "POST",
        headers: { ...headers, "Content-Type": "application/json" },
        body: JSON.stringify(llmGatewayData),
      });
      if (!res.ok) throw new Error(`Error: ${res.status}`);
      const response = await res.json();

      const result = response.choices[0].message.content;

      let resJson;
      try {
        resJson = JSON.parse(result);
      } catch (e) {
        resJson = { named_entities: [] };
      }

      const namedEntities = resJson.named_entities || [];
      return namedEntities;
    }
    ```
  </Tab>
</Tabs>

Transcribe an audio file using the AssemblyAI API:

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    ```python theme={null}
    with open("./my-audio.mp3", "rb") as f:
        response = requests.post(base_url + "/v2/upload", headers=headers, data=f)

    upload_url = response.json()["upload_url"]
    data = {"audio_url": upload_url}  # You can also use a URL to an audio or video file on the web

    response = requests.post(base_url + "/v2/transcript", json=data, headers=headers)
    transcript_id = response.json()['id']
    polling_endpoint = base_url + "/v2/transcript/" + transcript_id

    while True:
        transcription_result = requests.get(polling_endpoint, headers=headers).json()
        if transcription_result['status'] == 'completed':
            break
        elif transcription_result['status'] == 'error':
            raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
        else:
            time.sleep(3)
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    ```javascript expandable theme={null}
    const fileData = await fs.readFile("./my-audio.mp3");
    let res = await fetch(baseUrl + "/v2/upload", {
      method: "POST",
      headers,
      body: fileData,
    });
    if (!res.ok) throw new Error(`Error: ${res.status}`);
    const uploadResponse = await res.json();

    const uploadUrl = uploadResponse.upload_url;
    const data = { audio_url: uploadUrl }; // You can also use a URL to an audio or video file on the web

    res = await fetch(baseUrl + "/v2/transcript", {
      method: "POST",
      headers: { ...headers, "Content-Type": "application/json" },
      body: JSON.stringify(data),
    });
    if (!res.ok) throw new Error(`Error: ${res.status}`);
    const transcriptResponse = await res.json();
    const transcriptId = transcriptResponse.id;
    const pollingEndpoint = baseUrl + "/v2/transcript/" + transcriptId;

    let transcriptionResult;
    while (true) {
      res = await fetch(pollingEndpoint, { headers });
      if (!res.ok) throw new Error(`Error: ${res.status}`);
      transcriptionResult = await res.json();
      if (transcriptionResult.status === "completed") {
        break;
      } else if (transcriptionResult.status === "error") {
        throw new Error(`Transcription failed: ${transcriptionResult.error}`);
      } else {
        await new Promise((resolve) => setTimeout(resolve, 3000));
      }
    }
    ```
  </Tab>
</Tabs>

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    Split the transcript into sentences, identify named entities using `generate_ner`, and replace them with # characters:

    ```python theme={null}
    transcript_text = transcription_result['text']
    sentences = re.split(r'[.!?]+', transcript_text)
    redacted_transcript = ''

    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence:
            continue

        generated_entities = generate_ner(sentence)
        redacted_sentence = sentence

        for entity in generated_entities:
            redacted_sentence = redacted_sentence.replace(entity, '#' * len(entity))

        redacted_transcript += redacted_sentence + '. '
        print(redacted_sentence)
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    Split the transcript into sentences, identify named entities using `generateNer`, and replace them with # characters:

    ```javascript theme={null}
    const transcriptText = transcriptionResult.text;
    const sentences = transcriptText.split(/[.!?]+/);
    let redactedTranscript = "";

    for (let sentence of sentences) {
      sentence = sentence.trim();
      if (!sentence) continue;

      const generatedEntities = await generateNer(sentence);
      let redactedSentence = sentence;

      for (const entity of generatedEntities) {
        redactedSentence = redactedSentence.replaceAll(entity, "#".repeat(entity.length));
      }

      redactedTranscript += redactedSentence + ". ";
      console.log(redactedSentence);
    }
    ```
  </Tab>
</Tabs>

Print the fully redacted transcript:

<Tabs groupId="language">
  <Tab language="python" title="Python" default>
    ```python theme={null}
    print('\nFull redacted transcript:')
    print(redacted_transcript)
    ```
  </Tab>

  <Tab language="javascript" title="JavaScript">
    ```javascript theme={null}
    console.log("\nFull redacted transcript:");
    console.log(redactedTranscript);
    ```
  </Tab>
</Tabs>
