Prompt A Structured Q&A Response Using LLM Gateway

This cookbook will demonstrate how to use AssemblyAI’s LLM Gateway framework to prompt a structured question and answer response.

Quickstart

1import requests
2import time
3import xml.etree.ElementTree as ET
4
5API_KEY = "YOUR_API_KEY"
6audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4"
7
8# -------------------------------
9# Step 1: Transcribe the audio
10# -------------------------------
11transcript_request = requests.post(
12 "https://api.assemblyai.com/v2/transcript",
13 headers={"authorization": API_KEY, "content-type": "application/json"},
14 json={"audio_url": audio_url},
15)
16
17transcript_id = transcript_request.json()["id"]
18
19# Poll for completion
20while True:
21 polling_response = requests.get(
22 f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
23 headers={"authorization": API_KEY},
24 )
25 status = polling_response.json()["status"]
26
27 if status == "completed":
28 transcript_text = polling_response.json()["text"]
29 break
30 elif status == "error":
31 raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}")
32 else:
33 print(f"Transcription status: {status}")
34 time.sleep(3)
35
36# -------------------------------
37# Step 2: Build question helper functions
38# -------------------------------
39def construct_question(question):
40 question_str = f"Question: {question['question']}"
41
42 if question.get("context"):
43 question_str += f"\nContext: {question['context']}"
44
45 # Default answer_format
46 if not question.get("answer_format"):
47 question["answer_format"] = "short sentence"
48
49 question_str += f"\nAnswer Format: {question['answer_format']}"
50
51 if question.get("answer_options"):
52 options_str = ", ".join(question["answer_options"])
53 question_str += f"\nOptions: {options_str}"
54
55 return question_str + "\n"
56
57
58def escape_xml_characters(xml_string):
59 return xml_string.replace("&", "&")
60
61
62# -------------------------------
63# Step 3: Define questions
64# -------------------------------
65questions = [
66 {
67 "question": "What are the top level KPIs for engineering?",
68 "context": "KPI stands for key performance indicator",
69 "answer_format": "short sentence",
70 },
71 {
72 "question": "How many days has it been since the data team has gotten updated metrics?",
73 "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"],
74 },
75 {"question": "What are the future plans for the project?"},
76]
77
78question_str = "\n".join(construct_question(q) for q in questions)
79
80# -------------------------------
81# Step 4: Build the LLM prompt
82# -------------------------------
83prompt = f"""You are an expert at giving accurate answers to questions about texts.
84No preamble.
85Given the series of questions, answer the questions.
86Each question may follow up with answer format, answer options, and context for each question.
87It is critical that you follow the answer format and answer options for each question.
88When context is provided with a question, refer to it when answering the question.
89You are useful, true and concise, and write in perfect English.
90Only the question is allowed between the <question> tag. Do not include the answer format, options, or question context in your response.
91Only text is allowed between the <question> and <answer> tags.
92XML tags are not allowed between the <question> and <answer> tags.
93End your response with a closing </responses> tag.
94For each question-answer pair, format your response according to the template provided below:
95
96Template for response:
97<responses>
98 <response>
99 <question>The question</question>
100 <answer>Your answer</answer>
101 </response>
102 <response>
103 ...
104 </response>
105 ...
106</responses>
107
108These are the questions:
109{question_str}
110
111Transcript:
112{transcript_text}
113"""
114
115# -------------------------------
116# Step 5: Query LLM Gateway
117# -------------------------------
118headers = {"authorization": API_KEY}
119
120response = requests.post(
121 "https://llm-gateway.assemblyai.com/v1/chat/completions",
122 headers=headers,
123 json={
124 "model": "claude-sonnet-4-5-20250929",
125 "messages": [{"role": "user", "content": prompt}],
126 "max_tokens": 2000,
127 },
128)
129
130response_json = response.json()
131llm_output = response_json["choices"][0]["message"]["content"]
132
133# -------------------------------
134# Step 6: Parse and print XML response
135# -------------------------------
136clean_response = escape_xml_characters(llm_output).strip()
137
138try:
139 root = ET.fromstring(clean_response)
140 for resp in root.findall("response"):
141 question = resp.find("question").text
142 answer = resp.find("answer").text
143 print(f"Question: {question}")
144 print(f"Answer: {answer}\n")
145except ET.ParseError as e:
146 print("Could not parse XML response.")
147 print("Raw model output:\n", llm_output)

Getting Started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an AssemblyAI account and get your API key from your dashboard.

Find more details on the current LLM Gateway pricing in the AssemblyAI pricing page.

Step-by-Step Instructions

In this guide, we will prompt LLM Gateway with a structured Q&A format and generate an XML response.

First, let’s import the necessary libraries and set our API key.

1import requests
2import time
3import xml.etree.ElementTree as ET
4
5API_KEY = "YOUR_API_KEY"

Next, we’ll use AssemblyAI to transcribe a file and save our transcript.

1audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4"
2
3# -------------------------------
4# Step 1: Transcribe the audio
5# -------------------------------
6transcript_request = requests.post(
7 "https://api.assemblyai.com/v2/transcript",
8 headers={"authorization": API_KEY, "content-type": "application/json"},
9 json={"audio_url": audio_url},
10)
11
12transcript_id = transcript_request.json()["id"]
13
14# Poll for completion
15while True:
16 polling_response = requests.get(
17 f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
18 headers={"authorization": API_KEY},
19 )
20 status = polling_response.json()["status"]
21
22 if status == "completed":
23 transcript_text = polling_response.json()["text"]
24 break
25 elif status == "error":
26 raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}")
27 else:
28 print(f"Transcription status: {status}")
29 time.sleep(3)

Construct a formatted string to structure the questions. This includes the question text, context, an answer format, any answer options, then returns the formatted string.

1# -------------------------------
2# Step 2: Build question helper functions
3# -------------------------------
4def construct_question(question):
5 question_str = f"Question: {question['question']}"
6
7 if question.get("context"):
8 question_str += f"\nContext: {question['context']}"
9
10 # Default answer_format
11 if not question.get("answer_format"):
12 question["answer_format"] = "short sentence"
13
14 question_str += f"\nAnswer Format: {question['answer_format']}"
15
16 if question.get("answer_options"):
17 options_str = ", ".join(question["answer_options"])
18 question_str += f"\nOptions: {options_str}"
19
20 return question_str + "\n"
21
22def escape_xml_characters(xml_string):
23 return xml_string.replace("&", "&amp;")

Define a list of questions. For each question, you can define additional context and specify either an answer_format or a list of answer_options.

1# -------------------------------
2# Step 3: Define questions
3# -------------------------------
4questions = [
5 {
6 "question": "What are the top level KPIs for engineering?",
7 "context": "KPI stands for key performance indicator",
8 "answer_format": "short sentence",
9 },
10 {
11 "question": "How many days has it been since the data team has gotten updated metrics?",
12 "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"],
13 },
14 {"question": "What are the future plans for the project?"},
15]

Construct the formatted question string for all the questions and build the LLM prompt.

1question_str = '\n'.join(construct_question(q) for q in questions)

Provide detailed instructions to prompt LLM Gateway to answer a series of questions. This also defines a structured XML template for the responses.

1# -------------------------------
2# Step 4: Build the LLM prompt
3# -------------------------------
4prompt = f"""You are an expert at giving accurate answers to questions about texts.
5No preamble.
6Given the series of questions, answer the questions.
7Each question may follow up with answer format, answer options, and context for each question.
8It is critical that you follow the answer format and answer options for each question.
9When context is provided with a question, refer to it when answering the question.
10You are useful, true and concise, and write in perfect English.
11Only the question is allowed between the <question> tag. Do not include the answer format, options, or question context in your response.
12Only text is allowed between the <question> and <answer> tags.
13XML tags are not allowed between the <question> and <answer> tags.
14End your response with a closing </responses> tag.
15For each question-answer pair, format your response according to the template provided below:
16
17Template for response:
18<responses>
19 <response>
20 <question>The question</question>
21 <answer>Your answer</answer>
22 </response>
23 <response>
24 ...
25 </response>
26 ...
27</responses>
28
29These are the questions:
30{question_str}
31
32Transcript:
33{transcript_text}
34"""
35
36# -------------------------------
37# Step 5: Query LLM Gateway
38# -------------------------------
39headers = {"authorization": API_KEY}
40
41response = requests.post(
42 "https://llm-gateway.assemblyai.com/v1/chat/completions",
43 headers=headers,
44 json={
45 "model": "claude-sonnet-4-5-20250929",
46 "messages": [{"role": "user", "content": prompt}],
47 "max_tokens": 2000,
48 },
49)
50
51response_json = response.json()
52llm_output = response_json["choices"][0]["message"]["content"]
53
54# -------------------------------
55# Step 6: Parse and print XML response
56# -------------------------------
57clean_response = escape_xml_characters(llm_output).strip()
58
59try:
60 root = ET.fromstring(clean_response)
61 for resp in root.findall("response"):
62 question = resp.find("question").text
63 answer = resp.find("answer").text
64 print(f"Question: {question}")
65 print(f"Answer: {answer}\n")
66except ET.ParseError as e:
67 print("Could not parse XML response.")
68 print("Raw model output:\n", llm_output)

Getting Started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an AssemblyAI account and get your API key from your dashboard.

Find more details on the current LLM Gateway pricing in the AssemblyAI pricing page.

Step-by-Step Instructions

In this guide, we will prompt LLM Gateway with a structured Q&A format and generate an XML response.

First, let’s import the necessary libraries and set our API key.

1import requests
2import time
3import xml.etree.ElementTree as ET
4
5API_KEY = "YOUR_API_KEY"

Next, we’ll use AssemblyAI to transcribe a file and save our transcript.

1audio_url = "https://storage.googleapis.com/aai-web-samples/meeting.mp4"
2
3# -------------------------------
4# Step 1: Transcribe the audio
5# -------------------------------
6transcript_request = requests.post(
7 "https://api.assemblyai.com/v2/transcript",
8 headers={"authorization": API_KEY, "content-type": "application/json"},
9 json={"audio_url": audio_url},
10)
11
12transcript_id = transcript_request.json()["id"]
13
14# Poll for completion
15while True:
16 polling_response = requests.get(
17 f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
18 headers={"authorization": API_KEY},
19 )
20 status = polling_response.json()["status"]
21
22 if status == "completed":
23 transcript_text = polling_response.json()["text"]
24 break
25 elif status == "error":
26 raise RuntimeError(f"Transcription failed: {polling_response.json()['error']}")
27 else:
28 print(f"Transcription status: {status}")
29 time.sleep(3)

Construct a formatted string to structure the questions. This includes the question text, context, an answer format, and any answer options, then returns the formatted string.

1# -------------------------------
2# Step 2: Build question helper functions
3# -------------------------------
4def construct_question(question):
5 question_str = f"Question: {question.question}"
6
7 if question.context:
8 question_str += f"\nContext: {question.context}"
9
10 if not question.answer_format:
11 question.answer_format = "short sentence"
12
13 question_str += f"\nAnswer Format: {question.answer_format}"
14
15 if question.answer_options:
16 options_str = ", ".join(question.answer_options)
17 question_str += f"\nOptions: {options_str}"
18
19 return question_str + "\n"
20
21def escape_xml_characters(xml_string):
22 return xml_string.replace("&", "&amp;")

Define a list of questions. For each question, you can define additional context and specify either an answer_format or a list of answer_options.

1# -------------------------------
2# Step 3: Define questions
3# -------------------------------
4questions = [
5 {
6 "question": "What are the top level KPIs for engineering?",
7 "context": "KPI stands for key performance indicator",
8 "answer_format": "short sentence",
9 },
10 {
11 "question": "How many days has it been since the data team has gotten updated metrics?",
12 "answer_options": ["1", "2", "3", "4", "5", "6", "7", "more than 7"],
13 },
14 {"question": "What are the future plans for the project?"},
15]

Construct the formatted question string for all the questions and build the LLM prompt.

1question_str = '\n'.join(construct_question(q) for q in questions)

Provide detailed instructions to prompt LLM Gateway to answer a series of questions. This also defines a structured XML template for the responses.

1# -------------------------------
2# Step 4: Build the LLM prompt
3# -------------------------------
4prompt = f"""You are an expert at giving accurate answers to questions about texts.
5No preamble.
6Given the series of questions, answer the questions.
7Each question may follow up with answer format, answer options, and context for each question.
8It is critical that you follow the answer format and answer options for each question.
9When context is provided with a question, refer to it when answering the question.
10You are useful, true and concise, and write in perfect English.
11Only the question is allowed between the <question> tag. Do not include the answer format, options, or question context in your response.
12Only text is allowed between the <question> and <answer> tags.
13XML tags are not allowed between the <question> and <answer> tags.
14End your response with a closing </responses> tag.
15For each question-answer pair, format your response according to the template provided below:
16
17Template for response:
18<responses>
19 <response>
20 <question>The question</question>
21 <answer>Your answer</answer>
22 </response>
23 <response>
24 ...
25 </response>
26 ...
27</responses>
28
29These are the questions:
30{question_str}
31
32Transcript:
33{transcript_text}
34"""

Prompt the LLM Gateway model and return the response.

1# -------------------------------
2# Step 5: Query LLM Gateway
3# -------------------------------
4headers = {"authorization": API_KEY}
5
6response = requests.post(
7 "https://llm-gateway.assemblyai.com/v1/chat/completions",
8 headers=headers,
9 json={
10 "model": "claude-sonnet-4-5-20250929",
11 "messages": [{"role": "user", "content": prompt}],
12 "max_tokens": 2000,
13 },
14)
15
16response_json = response.json()
17llm_output = response_json["choices"][0]["message"]["content"]

Clean the XML output and print the question and answer pairs.

1# -------------------------------
2# Step 6: Parse and print XML response
3# -------------------------------
4clean_response = escape_xml_characters(llm_output).strip()
5
6try:
7 root = ET.fromstring(clean_response)
8 for resp in root.findall("response"):
9 question = resp.find("question").text
10 answer = resp.find("answer").text
11 print(f"Question: {question}")
12 print(f"Answer: {answer}\n")
13except ET.ParseError as e:
14 print("Could not parse XML response.")
15 print("Raw model output:\n", llm_output)