Prompt testing
With the OpenAI API
!pip install openai
from openai import OpenAI
client = OpenAI(api_key=API_KEY)
from pathlib import Path
JSONPATH = Path("/Users/joregan/Playing/hsi/annotations/final_resolved")
SKIPPABLES = ["room"]
def process_item(item):
utt_key = "utterance_type"
if not utt_key in item["high_level"]:
utt_key = "topic_name"
if item["high_level"][utt_key] == "conversation_generic":
return {}
current = item["high_level"]["current_topic"]
if current in SKIPPABLES:
return {}
snippet = item["snippet"]
return {
"topic:": current,
"snippet": snippet
}
import json
alldata = []
filepointer = []
for jsonfile in JSONPATH.glob("*.json"):
stem = jsonfile.stem
parts = stem.split("_")
person = parts[1]
room = parts[3]
with open(jsonfile) as f:
data = json.load(f)
output = {
"person": person,
"room": room,
"items": []
}
for item in data.keys():
processed = process_item(data[item])
if processed and processed != {}:
output["items"].append(processed)
alldata.append(output)
filepointer.append(stem)
with open("/tmp/hsi.json", "w") as f:
json.dump(alldata, f, indent=4)
test = alldata[0]
PROMPT = f"""
Below is some data from a conversation. The data is in JSON format.
The data contains a list of items, each with a topic and a snippet. The topic is the subject of the conversation, and the snippet is a short excerpt from the conversation.
Continue the conversation by writing a new snippet that is relevant to the topic. The new snippet should contain a reference to the topic and should be a natural continuation of the conversation.
The new snippet should be in the same format as the original snippets, and should be a short paragraph of text, of between 30 and 80 words.
There should only be one reference to the topic in the new snippet. It should be a natural part of the conversation, and should not be forced or awkward. It should be marked in boldface using markdown syntax. Only the determiner should be marked; boldface is marked by a single asterisk. The reference should be to the item itself, not to an abstract property related to the item: for example, in "the style of that painting", only "that painting" is a valid reference to the item: "the style" is not a valid reference to the item.
The reference to the topic should be suitable for instructing a person to point at it in a conversation. It can be a noun phrase, or a pronoun, but it should be clear and unambiguous. If the reference is a noun phrase, it should include a determiner, such as "this" or "that". Do not count articles ("the", "a", "an") as determiners, we are only interested in demonstrative determiners. If the reference is a pronoun, it should be clear what it refers to. Be sure to vary the references so there are results with each type of reference.
If another topic is mentioned in the new snippet and the spatial relationship is very clear, it is ok to reuse that spatial reference.
'If you want, I can stop by the kiosk and get *a newspaper* for tomorrow.' is a bad example for a snippet, because it does not contain to the item itself, but to another item of that category; also it includes an article "a" instead of a demonstrative determiner.
Create as many new snippets as there are items in the data.
```json
{test}
```
"""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": PROMPT },
]
}
]
)
response
response_text = response.choices[0].message.content
response_text
def clean_response(response_text):
response_text = response_text.replace("```json", "")
response_text = response_text.replace("```", "")
response_text = response_text.strip()
# response_text = response_text.replace("'", '"')
# try:
# return json.loads(response_text)
# except json.JSONDecodeError:
# print(response_text)
data = eval(response_text)
return data
clean_response(response_text)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": PROMPT },
]
}
]
)
response
response_text = response.choices[0].message.content
clean_response(response_text)
esponse = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": PROMPT },
]
}
]
)
esponse
esponse_text = esponse.choices[0].message.content
clean_response(esponse_text)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": PROMPT },
]
}
]
)
response
response_text = response.choices[0].message.content
clean_response(response_text)
clean_response(response_text)
clean_response(response_text)
PROMPT = f"""
You are given a JSON list of conversation items. Each item has a "topic" and a "snippet" (a short excerpt).
Your task is to write a **new snippet** for each topic:
- The new snippet must naturally continue the conversation and be **30–80 words** long.
- It must include exactly **one reference** to the **specific topic**, using a demonstrative determiner ("this", "that", "these", "those") or a clear pronoun.
- **Only** the determiner should be marked in bold using markdown (e.g., *this painting*).
- Do not refer to a general category (e.g., "a newspaper"); refer directly to the topic.
- References must sound natural, not forced. Vary between noun phrases and pronouns where possible.
- If another topic is clearly mentioned in relation to the main one, it’s acceptable to reuse spatial references.
- Create **one new snippet per item**.
Bad example:
'If you want, I can stop by the kiosk and get *a newspaper* for tomorrow.' → Incorrect: refers to another item, uses an article instead of a demonstrative determiner.
---
```json
{test}
```
"""
esponse = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": PROMPT },
]
}
]
)
esponse_text = esponse.choices[0].message.content
clean_response(esponse_text)
def get_prompt(item):
test = json.dumps(item, indent=4)
return f"""
You are given a JSON list of conversation items. Each item has a "topic" and a "snippet" (a short excerpt).
Your task is to write a **new snippet** for each topic:
- The new snippet must naturally continue the conversation and be **30–80 words** long.
- It must include exactly **one reference** to the **specific topic**, using a demonstrative determiner ("this", "that", "these", "those") or a clear pronoun.
- **Only** the determiner should be marked in bold using markdown (e.g., *this painting*).
- Do not refer to a general category (e.g., "a newspaper"); refer directly to the topic.
- References must sound natural, not forced. Vary between noun phrases and pronouns where possible.
- If another topic is clearly mentioned in relation to the main one, it’s acceptable to reuse spatial references.
- Create **one new snippet per item**.
Bad example:
'If you want, I can stop by the kiosk and get *a newspaper* for tomorrow.' → Incorrect: refers to another item, uses an article instead of a demonstrative determiner.
---
```json
{test}
```
"""
all_results = {}
def process_item(itemid, max_retries=3):
current = alldata[itemid]
filename = filepointer[itemid]
prompt = get_prompt(current)
allreskey = f"{filename}__{itemid}"
if not allreskey in all_results:
all_results[allreskey] = {
"person": current["person"],
"room": current["room"],
"filename": filename,
"items": []
}
current_result = all_results[allreskey]
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": prompt },
]
}
]
)
response_text = response.choices[0].message.content
response_data = clean_response(response_text)
if type(response_data) == dict:
current_result["items"].append(response_data["items"])
elif type(response_data) == list:
current_result["items"].append(response_data)
except Exception as e:
print(f"⚠️ Error on attempt {attempt + 1}: {e}")
if attempt == max_retries - 1:
print("Max retries reached. Skipping this item.")
return
for itemid in range(len(alldata)):
process_item(itemid)
with open("/tmp/allresults.json", "w") as f:
json.dump(all_results, f, indent=4)
def make_single(listof_lists):
result = []
if len(listof_lists) == 0:
return []
if type(listof_lists[0]) is not list:
return listof_lists
for sublist in listof_lists:
for item in sublist:
result.append(item)
return result
redo = []
numered_all = []
for key in all_results.keys():
if all_results[key]["items"] == []:
redo.append(key)
sublist = make_single(all_results[key]["items"])
if sublist == []:
continue
for count, item in enumerate(sublist):
if not "snippet" in item:
if "new_snippet" in item:
item["snippet"] = item["new_snippet"]
if not "topic" in item:
if "topic:" in item:
item["topic"] = item["topic:"]
current = {
"person": all_results[key]["person"],
"room": all_results[key]["room"],
"filename": all_results[key]["filename"],
"id": f"{key}__{count}",
"topic": item["topic"],
"snippet": item["snippet"]
}
numered_all.append(current)
with open("/tmp/numered_all.json", "w") as f:
json.dump(numered_all, f, indent=4)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": prompt },
]
}
]
)