Prompt A Structured Q&A Response Using LLM Gateway | AssemblyAI

This cookbook will demonstrate how to use AssemblyAI’s LLM Gateway framework to prompt a structured question and answer response.

Quickstart

1 import requests
2 import time
3 import xml.etree.ElementTree as ET
4 
5 API_KEY = "YOUR_API_KEY"
6 audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4"
7 
8 # -------------------------------
9 # Step 1: Transcribe the audio
10 # -------------------------------
11 transcript_request = requests.post(
12     "https://api.assemblyai.com/v2/transcript",
13     headers={"authorization": API_KEY, "content-type": "application/json"},
14     json={"audio_url": audio_url},
15 )
16 
17 transcript_id = transcript_request.json()["id"]
18 
19 # Poll for completion
20 while True:
21     polling_response = requests.get(
22         f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
23         headers={"authorization": API_KEY},
24     )
25     status = polling_response.json()["status"]
26 
27     if status == "completed":
28         transcript_text = polling_response.json()["text"]
29         break
30     elif status == "error":
31         raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}")
32     else:
33         print(f"Transcription status: {status}")
34         time.sleep(3)
35 
36 # -------------------------------
37 # Step 2: Build question helper functions
38 # -------------------------------
39 def construct_question(question):
40     question_str = f"Question: {question['question']}"
41 
42     if question.get("context"):
43         question_str += f"\nContext: {question['context']}"
44 
45     # Default answer_format
46     if not question.get("answer_format"):
47         question["answer_format"] = "short sentence"
48 
49     question_str += f"\nAnswer Format: {question['answer_format']}"
50 
51     if question.get("answer_options"):
52         options_str = ", ".join(question["answer_options"])
53         question_str += f"\nOptions: {options_str}"
54 
55     return question_str + "\n"
56 
57 
58 def escape_xml_characters(xml_string):
59     return xml_string.replace("&", "&amp;")
60 
61 
62 # -------------------------------
63 # Step 3: Define questions
64 # -------------------------------
65 questions = [
66     {
67         "question": "What are the top level KPIs for engineering?",
68         "context": "KPI stands for key performance indicator",
69         "answer_format": "short sentence",
70     },
71     {
72         "question": "How many days has it been since the data team has gotten updated metrics?",
73         "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"],
74     },
75     {"question": "What are the future plans for the project?"},
76 ]
77 
78 question_str = "\n".join(construct_question(q) for q in questions)
79 
80 # -------------------------------
81 # Step 4: Build the LLM prompt
82 # -------------------------------
83 prompt = f"""You are an expert at giving accurate answers to questions about texts.
84 No preamble.
85 Given the series of questions, answer the questions.
86 Each question may follow up with answer format, answer options, and context for each question.
87 It is critical that you follow the answer format and answer options for each question.
88 When context is provided with a question, refer to it when answering the question.
89 You are useful, true and concise, and write in perfect English.
90 Only the question is allowed between the <question> tag. Do not include the answer format, options, or question context in your response.
91 Only text is allowed between the <question> and <answer> tags.
92 XML tags are not allowed between the <question> and <answer> tags.
93 End your response with a closing </responses> tag.
94 For each question-answer pair, format your response according to the template provided below:
95 
96 Template for response:
97 <responses>
98   <response>
99     <question>The question</question>
100     <answer>Your answer</answer>
101   </response>
102   <response>
103     ...
104   </response>
105   ...
106 </responses>
107 
108 These are the questions:
109 {question_str}
110 
111 Transcript:
112 {transcript_text}
113 """
114 
115 # -------------------------------
116 # Step 5: Query LLM Gateway
117 # -------------------------------
118 headers = {"authorization": API_KEY}
119 
120 response = requests.post(
121     "https://llm-gateway.assemblyai.com/v1/chat/completions",
122     headers=headers,
123     json={
124         "model": "claude-sonnet-4-5-20250929",
125         "messages": [{"role": "user", "content": prompt}],
126         "max_tokens": 2000,
127     },
128 )
129 
130 response_json = response.json()
131 llm_output = response_json["choices"][0]["message"]["content"]
132 
133 # -------------------------------
134 # Step 6: Parse and print XML response
135 # -------------------------------
136 clean_response = escape_xml_characters(llm_output).strip()
137 
138 try:
139     root = ET.fromstring(clean_response)
140     for resp in root.findall("response"):
141         question = resp.find("question").text
142         answer = resp.find("answer").text
143         print(f"Question: {question}")
144         print(f"Answer: {answer}\n")
145 except ET.ParseError as e:
146     print("Could not parse XML response.")
147     print("Raw model output:\n", llm_output)

Getting Started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an AssemblyAI account and get your API key from your dashboard.

Find more details on the current LLM Gateway pricing in the AssemblyAI pricing page.

Step-by-Step Instructions

In this guide, we will prompt LLM Gateway with a structured Q&A format and generate an XML response.

First, let’s import the necessary libraries and set our API key.

1 import requests
2 import time
3 import xml.etree.ElementTree as ET
4 
5 API_KEY = "YOUR_API_KEY"

Next, we’ll use AssemblyAI to transcribe a file and save our transcript.

1 audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4"
2 
3 # -------------------------------
4 # Step 1: Transcribe the audio
5 # -------------------------------
6 transcript_request = requests.post(
7     "https://api.assemblyai.com/v2/transcript",
8     headers={"authorization": API_KEY, "content-type": "application/json"},
9     json={"audio_url": audio_url},
10 )
11 
12 transcript_id = transcript_request.json()["id"]
13 
14 # Poll for completion
15 while True:
16     polling_response = requests.get(
17         f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
18         headers={"authorization": API_KEY},
19     )
20     status = polling_response.json()["status"]
21 
22     if status == "completed":
23         transcript_text = polling_response.json()["text"]
24         break
25     elif status == "error":
26         raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}")
27     else:
28         print(f"Transcription status: {status}")
29         time.sleep(3)

Construct a formatted string to structure the questions. This includes the question text, context, an answer format, any answer options, then returns the formatted string.

1 # -------------------------------
2 # Step 2: Build question helper functions
3 # -------------------------------
4 def construct_question(question):
5     question_str = f"Question: {question['question']}"
6 
7     if question.get("context"):
8         question_str += f"\nContext: {question['context']}"
9 
10     # Default answer_format
11     if not question.get("answer_format"):
12         question["answer_format"] = "short sentence"
13 
14     question_str += f"\nAnswer Format: {question['answer_format']}"
15 
16     if question.get("answer_options"):
17         options_str = ", ".join(question["answer_options"])
18         question_str += f"\nOptions: {options_str}"
19 
20     return question_str + "\n"
21 
22 def escape_xml_characters(xml_string):
23     return xml_string.replace("&", "&amp;")

Define a list of questions. For each question, you can define additional context and specify either an answer_format or a list of answer_options.

1 # -------------------------------
2 # Step 3: Define questions
3 # -------------------------------
4 questions = [
5     {
6         "question": "What are the top level KPIs for engineering?",
7         "context": "KPI stands for key performance indicator",
8         "answer_format": "short sentence",
9     },
10     {
11         "question": "How many days has it been since the data team has gotten updated metrics?",
12         "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"],
13     },
14     {"question": "What are the future plans for the project?"},
15 ]

Construct the formatted question string for all the questions and build the LLM prompt.

1 question_str = '\n'.join(construct_question(q) for q in questions)

Provide detailed instructions to prompt LLM Gateway to answer a series of questions. This also defines a structured XML template for the responses.

1 # -------------------------------
2 # Step 4: Build the LLM prompt
3 # -------------------------------
4 prompt = f"""You are an expert at giving accurate answers to questions about texts.
5 No preamble.
6 Given the series of questions, answer the questions.
7 Each question may follow up with answer format, answer options, and context for each question.
8 It is critical that you follow the answer format and answer options for each question.
9 When context is provided with a question, refer to it when answering the question.
10 You are useful, true and concise, and write in perfect English.
11 Only the question is allowed between the <question> tag. Do not include the answer format, options, or question context in your response.
12 Only text is allowed between the <question> and <answer> tags.
13 XML tags are not allowed between the <question> and <answer> tags.
14 End your response with a closing </responses> tag.
15 For each question-answer pair, format your response according to the template provided below:
16 
17 Template for response:
18 <responses>
19   <response>
20     <question>The question</question>
21     <answer>Your answer</answer>
22   </response>
23   <response>
24     ...
25   </response>
26   ...
27 </responses>
28 
29 These are the questions:
30 {question_str}
31 
32 Transcript:
33 {transcript_text}
34 """
35 
36 # -------------------------------
37 # Step 5: Query LLM Gateway
38 # -------------------------------
39 headers = {"authorization": API_KEY}
40 
41 response = requests.post(
42     "https://llm-gateway.assemblyai.com/v1/chat/completions",
43     headers=headers,
44     json={
45         "model": "claude-sonnet-4-5-20250929",
46         "messages": [{"role": "user", "content": prompt}],
47         "max_tokens": 2000,
48     },
49 )
50 
51 response_json = response.json()
52 llm_output = response_json["choices"][0]["message"]["content"]
53 
54 # -------------------------------
55 # Step 6: Parse and print XML response
56 # -------------------------------
57 clean_response = escape_xml_characters(llm_output).strip()
58 
59 try:
60     root = ET.fromstring(clean_response)
61     for resp in root.findall("response"):
62         question = resp.find("question").text
63         answer = resp.find("answer").text
64         print(f"Question: {question}")
65         print(f"Answer: {answer}\n")
66 except ET.ParseError as e:
67     print("Could not parse XML response.")
68     print("Raw model output:\n", llm_output)