From 9614597392633c65e80a90dbc175ff0dfeaa1914 Mon Sep 17 00:00:00 2001
From: "@nheuser" <nheuser@uni-koblenz.de>
Date: Fri, 28 Jun 2024 12:58:02 +0200
Subject: [PATCH] updated data structure

---
 PythonAPI/.gitignore                  |  2 +-
 PythonAPI/getResults.py               | 32 ++++++++++++++++++++++-----
 PythonAPI/prompts/prompt_deutsch.txt  | 26 ++++++++++++++++++++++
 PythonAPI/prompts/prompt_englisch.txt | 26 ++++++++++++++++++++++
 PythonAPI/sendRequest.py              | 27 +++++++++++-----------
 5 files changed, 94 insertions(+), 19 deletions(-)
 create mode 100755 PythonAPI/prompts/prompt_deutsch.txt
 create mode 100755 PythonAPI/prompts/prompt_englisch.txt

diff --git a/PythonAPI/.gitignore b/PythonAPI/.gitignore
index 9243bb0..1b34d42 100644
--- a/PythonAPI/.gitignore
+++ b/PythonAPI/.gitignore
@@ -1,5 +1,5 @@
 /inputData
 /resultsAlgo
-/requests?.jsonl
+/requests
 /batchId.txt
 /resultsAPI.jsonl
\ No newline at end of file
diff --git a/PythonAPI/getResults.py b/PythonAPI/getResults.py
index 9320c62..1b89333 100644
--- a/PythonAPI/getResults.py
+++ b/PythonAPI/getResults.py
@@ -2,11 +2,35 @@ import json
 from openai import OpenAI
 
 client = OpenAI()
+REPEAT = 3
+
+def compareAIAnswers(fileId):
+    resultsAPI = client.files.content(fileId)
+    resultsAPIList = []
+    with open("resultsAPI.jsonl", "rw") as resultAPIFile:
+        resultAPIFile.write(resultsAPI)
+        for line in resultAPIFile:
+            resultsAPIList.append(json.loads(line.strip()))
+    with open("resultsOfMembership.txt") as resultsAlgo:
+        json_object = resultsAlgo.read()
+    resultsAlgoDict = json.loads(json_object)
+    comparingResultsDict = {}
+    for r in resultsAPIList:
+        id = str(int(r["custom_id"].split('-')[-1]) // REPEAT)
+        algoResult = resultsAlgoDict[id]
+        apiResult = json.loads(r)
+        apiResultDict = json.loads(apiResult["response"]["body"]["choices"][0]["message"]["content"])
+        i = 0
+        for b in apiResultDict:
+            comparingResultsDict[id] = (b == algoResult[i])
+            i += 1
+
+
 
 if __name__ == "__main__":
     with open("batchId.txt") as batchIdFile:
-        batchId = batchIdFile.read()
-    batchResult = client.batches.retrieve(batch_id=batchId)
+        batch_Id = batchIdFile.read()
+    batchResult = client.batches.retrieve(batch_id=batch_Id)
     match batchResult.status:
         case "validating":
             print("Request is being validated")
@@ -18,9 +42,7 @@ if __name__ == "__main__":
             print("Results are being prepared")
         case "completed":
             print("Request has been completed")
-            results = client.files.content(batchResult.output_file_id)
-            with open("resultsAPI.jsonl", "w") as resultAPI:
-                resultAPI.write(results)
+            compareAIAnswers(batchResult.output_file_id)
         case "expired":
             print("the results couldn't be completed in time")
         case "cancelling":
diff --git a/PythonAPI/prompts/prompt_deutsch.txt b/PythonAPI/prompts/prompt_deutsch.txt
new file mode 100755
index 0000000..67287dc
--- /dev/null
+++ b/PythonAPI/prompts/prompt_deutsch.txt
@@ -0,0 +1,26 @@
+Du bekommst eine formale Grammatik G = (V, T, R, S), wobei V die Menge der Variablen, T die Menge der Terminalsymbole, R die Menge der Produktionsregeln und S das Startsymbol ist. Die Grammatik erzeugt dabei eine Sprache L(G). AuÃŸerdem bekommst du eine Liste mit WÃ¶rtern, fÃ¼r die du jeweils prÃ¼fen sollst, ob das Wort Teil von L(G) ist. Wenn das Wort Teil von L(G) ist, gib True und eine Liste zurÃ¼ck, bei der jedes Element einem Ableitungsschritt des Wortes mit den Produktionsregeln aus R entsprecht. Es muss eine Linksableitung angegeben werden. Das heiÃŸt pro Schritt darf immer nur die am weitesten links stehende Variable durch die Conclusio einer Regel ersetzt werden, die die besagte Variable als  PrÃ¤misse hat. Es ist nicht erlaubt mehr als eine Variable pro Schritt zu ersetzen. Wenn das Wort nicht Teil von L(G) ist, gib False und eine leere Liste zurÃ¼ck. Gib die Antworten ausschlieÃŸlich, wie in den folgenden Beispielen gezeigt, als JSON Objekt zurÃ¼ck, es dÃ¼rfen keine weiteren Angaben in deiner Antwort vorkommen:
+1. Beispiel:
+Anfrage des Benutzers: 
+Die Grammatik lautet:
+G = (V, T, R, S)
+V = {S, A, B}
+T = {a, b, c}
+R = {S -> cS, S -> bA, S -> a, A -> aS, A -> aB, B -> c}
+Die Liste der WÃ¶rter ist:
+["babaa", "cb"]
+
+Deine Antwort:
+[{True: [ "S", "bA", "baS", "babA", "babaS", "babaa"]}, {False: []}]
+
+2. Beispiel:
+Anfrage des Benutzers: 
+Die Grammatik lautet:
+G = (V, T, R, S)
+V = {S, A, B, C}
+T = {a, b, c}
+R = {S -> aA, S -> CB, A -> Sc, A -> AACA, B -> cCS, B -> CC, C -> Îµ, C -> bb}
+Die Liste der WÃ¶rter ist:
+["acbbc", ""]
+
+Deine Antwort:
+[{True: ["S", "aA", "aSc", "aCBc", "aBc", "acCSc", "acbbSc", "acbbCBc", "acbbBc", "acbbCCc", "acbbCc", "acbbc"]}, {True: ["S", "CB", "B", "CC", "C", ""]}]
\ No newline at end of file
diff --git a/PythonAPI/prompts/prompt_englisch.txt b/PythonAPI/prompts/prompt_englisch.txt
new file mode 100755
index 0000000..a54c3e2
--- /dev/null
+++ b/PythonAPI/prompts/prompt_englisch.txt
@@ -0,0 +1,26 @@
+You will receive a formal grammar G = (N, Î£, P, S), where N is the set of nonterminal symbols, Î£ is the set of terminal symbols, P is the set of production rules and S is the startsymbol. This grammar generates the language L(G). On top of that you will receive a list consisting of words, for which you must check whether they are part of L(G) or not. If a word is part of L(G) you must return True and a list that where each element is a step that is used to derive the word with the production rules of P. The derivation must be a leftmost derivation where in each step only the leftmost variable is substituted with the right hand side of a rule that has said variable as the left hand side. It is not allowed to substitute more than one variable per step. If the word is not part of L(G) than you must return False and an empty list. You're answer must only include JSON syntax as shown in the following examples:
+1. Example:
+Message of the user: 
+The grammar is defined as:
+G = (N, Î£, P, S)
+N = {S, A, B}
+Î£ = {a, b, c}
+P = {S -> cS, S -> bA, S -> a, A -> aS, A -> aB, B -> c}
+The list of words is:
+["babaa", "cb"]
+
+Your answer:
+[{True: [ "S", "bA", "baS", "babA", "babaS", "babaa"]}, {False: []}]
+
+2. Example
+Message of the user: 
+The grammar is defined as:
+G = (N, Î£, P, S)
+N = {S, A, B, C}
+Î£ = {a, b, c}
+P = {S -> aA, S -> CB, A -> Sc, A -> AACA, B -> cCS, B -> CC, C -> Îµ, C -> bb}
+The list of words is:
+["acbbc", ""]
+
+Your answer:
+[{True: ["S", "aA", "aSc", "aCBc", "aBc", "acCSc", "acbbSc", "acbbCBc", "acbbBc", "acbbCCc", "acbbCc", "acbbc"]}, {True: ["S", "CB", "B", "CC", "C", ""]}]
diff --git a/PythonAPI/sendRequest.py b/PythonAPI/sendRequest.py
index 3a52c98..92cbf6b 100644
--- a/PythonAPI/sendRequest.py
+++ b/PythonAPI/sendRequest.py
@@ -4,24 +4,25 @@ from openai import OpenAI
 client = OpenAI()
 GPT_MODEL = "gpt-3.5-turbo-0125"
 REPEAT = 3
+r_id = 0
 
 if __name__ == "__main__":
     with open('grammarWordsPairs.txt') as grammarWordsPairs:
-        jsonArray = grammarWordsPairs.read()
-    array = json.loads(jsonArray)
-    with open("prompt_deutsch.txt", encoding="utf-8") as prompt:
+        json_object = grammarWordsPairs.read()
+    dict = json.loads(json_object)
+    with open("prompts/prompt_deutsch.txt", encoding="utf-8") as prompt:
         systemMessage = prompt.read()
-    inputTemplate = {"custom_id": None, "method": "POST", "url": "/v1/chat/completions", "body": {"model": GPT_MODEL, "messages": [{"role": "system", "content": systemMessage}, {"role": "user", "content": None}]}}
+    inputTemplate = {"custom_id": None, "method": "POST", "url": "/v1/chat/completions", "body": {"model": GPT_MODEL, "messages": [{"role": "system", "content": systemMessage}, {"role": "user", "content": None}], "response_format": {"type": "json_object"}}}
     requestId = 0
-    for e in array:
-        for key in e:
-            userMessage = f"Die Grammatik lautet:\n{key}\nDie Liste der WÃ¶rter lautet:\n{e[key]}"
-            inputTemplate["body"]["messages"][1]["content"] = userMessage
-            for i in range(REPEAT):
-                inputTemplate["custom_id"] = f"request-{requestId}"
-                requestId += 1
-                with open('requests.jsonl', 'a', encoding="utf-8") as requests:
-                    requests.write(json.dumps(inputTemplate) + '\n')
+    for key in dict:
+        userMessage = f"Die Grammatik lautet:\n{key}\nDie Liste der WÃ¶rter lautet:\n{dict[key]}"
+        inputTemplate["body"]["messages"][1]["content"] = userMessage
+        for i in range(REPEAT):
+            inputTemplate["custom_id"] = f"request-{requestId}"
+            requestId += 1
+            request_file_name = f"requests{r_id}"
+            with open(request_file_name, 'a', encoding="utf-8") as requests:
+                requests.write(json.dumps(inputTemplate) + '\n')
     """ with open("requests.jsonl") as allRequests:
         fileObject = client.files.create(
             file=allRequests,
-- 
GitLab