From 9614597392633c65e80a90dbc175ff0dfeaa1914 Mon Sep 17 00:00:00 2001 From: "@nheuser" <nheuser@uni-koblenz.de> Date: Fri, 28 Jun 2024 12:58:02 +0200 Subject: [PATCH] updated data structure --- PythonAPI/.gitignore | 2 +- PythonAPI/getResults.py | 32 ++++++++++++++++++++++----- PythonAPI/prompts/prompt_deutsch.txt | 26 ++++++++++++++++++++++ PythonAPI/prompts/prompt_englisch.txt | 26 ++++++++++++++++++++++ PythonAPI/sendRequest.py | 27 +++++++++++----------- 5 files changed, 94 insertions(+), 19 deletions(-) create mode 100755 PythonAPI/prompts/prompt_deutsch.txt create mode 100755 PythonAPI/prompts/prompt_englisch.txt diff --git a/PythonAPI/.gitignore b/PythonAPI/.gitignore index 9243bb0..1b34d42 100644 --- a/PythonAPI/.gitignore +++ b/PythonAPI/.gitignore @@ -1,5 +1,5 @@ /inputData /resultsAlgo -/requests?.jsonl +/requests /batchId.txt /resultsAPI.jsonl \ No newline at end of file diff --git a/PythonAPI/getResults.py b/PythonAPI/getResults.py index 9320c62..1b89333 100644 --- a/PythonAPI/getResults.py +++ b/PythonAPI/getResults.py @@ -2,11 +2,35 @@ import json from openai import OpenAI client = OpenAI() +REPEAT = 3 + +def compareAIAnswers(fileId): + resultsAPI = client.files.content(fileId) + resultsAPIList = [] + with open("resultsAPI.jsonl", "rw") as resultAPIFile: + resultAPIFile.write(resultsAPI) + for line in resultAPIFile: + resultsAPIList.append(json.loads(line.strip())) + with open("resultsOfMembership.txt") as resultsAlgo: + json_object = resultsAlgo.read() + resultsAlgoDict = json.loads(json_object) + comparingResultsDict = {} + for r in resultsAPIList: + id = str(int(r["custom_id"].split('-')[-1]) // REPEAT) + algoResult = resultsAlgoDict[id] + apiResult = json.loads(r) + apiResultDict = json.loads(apiResult["response"]["body"]["choices"][0]["message"]["content"]) + i = 0 + for b in apiResultDict: + comparingResultsDict[id] = (b == algoResult[i]) + i += 1 + + if __name__ == "__main__": with open("batchId.txt") as batchIdFile: - batchId = batchIdFile.read() - batchResult = client.batches.retrieve(batch_id=batchId) + batch_Id = batchIdFile.read() + batchResult = client.batches.retrieve(batch_id=batch_Id) match batchResult.status: case "validating": print("Request is being validated") @@ -18,9 +42,7 @@ if __name__ == "__main__": print("Results are being prepared") case "completed": print("Request has been completed") - results = client.files.content(batchResult.output_file_id) - with open("resultsAPI.jsonl", "w") as resultAPI: - resultAPI.write(results) + compareAIAnswers(batchResult.output_file_id) case "expired": print("the results couldn't be completed in time") case "cancelling": diff --git a/PythonAPI/prompts/prompt_deutsch.txt b/PythonAPI/prompts/prompt_deutsch.txt new file mode 100755 index 0000000..67287dc --- /dev/null +++ b/PythonAPI/prompts/prompt_deutsch.txt @@ -0,0 +1,26 @@ +Du bekommst eine formale Grammatik G = (V, T, R, S), wobei V die Menge der Variablen, T die Menge der Terminalsymbole, R die Menge der Produktionsregeln und S das Startsymbol ist. Die Grammatik erzeugt dabei eine Sprache L(G). Außerdem bekommst du eine Liste mit Wörtern, für die du jeweils prüfen sollst, ob das Wort Teil von L(G) ist. Wenn das Wort Teil von L(G) ist, gib True und eine Liste zurück, bei der jedes Element einem Ableitungsschritt des Wortes mit den Produktionsregeln aus R entsprecht. Es muss eine Linksableitung angegeben werden. Das heißt pro Schritt darf immer nur die am weitesten links stehende Variable durch die Conclusio einer Regel ersetzt werden, die die besagte Variable als Prämisse hat. Es ist nicht erlaubt mehr als eine Variable pro Schritt zu ersetzen. Wenn das Wort nicht Teil von L(G) ist, gib False und eine leere Liste zurück. Gib die Antworten ausschließlich, wie in den folgenden Beispielen gezeigt, als JSON Objekt zurück, es dürfen keine weiteren Angaben in deiner Antwort vorkommen: +1. Beispiel: +Anfrage des Benutzers: +Die Grammatik lautet: +G = (V, T, R, S) +V = {S, A, B} +T = {a, b, c} +R = {S -> cS, S -> bA, S -> a, A -> aS, A -> aB, B -> c} +Die Liste der Wörter ist: +["babaa", "cb"] + +Deine Antwort: +[{True: [ "S", "bA", "baS", "babA", "babaS", "babaa"]}, {False: []}] + +2. Beispiel: +Anfrage des Benutzers: +Die Grammatik lautet: +G = (V, T, R, S) +V = {S, A, B, C} +T = {a, b, c} +R = {S -> aA, S -> CB, A -> Sc, A -> AACA, B -> cCS, B -> CC, C -> ε, C -> bb} +Die Liste der Wörter ist: +["acbbc", ""] + +Deine Antwort: +[{True: ["S", "aA", "aSc", "aCBc", "aBc", "acCSc", "acbbSc", "acbbCBc", "acbbBc", "acbbCCc", "acbbCc", "acbbc"]}, {True: ["S", "CB", "B", "CC", "C", ""]}] \ No newline at end of file diff --git a/PythonAPI/prompts/prompt_englisch.txt b/PythonAPI/prompts/prompt_englisch.txt new file mode 100755 index 0000000..a54c3e2 --- /dev/null +++ b/PythonAPI/prompts/prompt_englisch.txt @@ -0,0 +1,26 @@ +You will receive a formal grammar G = (N, Σ, P, S), where N is the set of nonterminal symbols, Σ is the set of terminal symbols, P is the set of production rules and S is the startsymbol. This grammar generates the language L(G). On top of that you will receive a list consisting of words, for which you must check whether they are part of L(G) or not. If a word is part of L(G) you must return True and a list that where each element is a step that is used to derive the word with the production rules of P. The derivation must be a leftmost derivation where in each step only the leftmost variable is substituted with the right hand side of a rule that has said variable as the left hand side. It is not allowed to substitute more than one variable per step. If the word is not part of L(G) than you must return False and an empty list. You're answer must only include JSON syntax as shown in the following examples: +1. Example: +Message of the user: +The grammar is defined as: +G = (N, Σ, P, S) +N = {S, A, B} +Σ = {a, b, c} +P = {S -> cS, S -> bA, S -> a, A -> aS, A -> aB, B -> c} +The list of words is: +["babaa", "cb"] + +Your answer: +[{True: [ "S", "bA", "baS", "babA", "babaS", "babaa"]}, {False: []}] + +2. Example +Message of the user: +The grammar is defined as: +G = (N, Σ, P, S) +N = {S, A, B, C} +Σ = {a, b, c} +P = {S -> aA, S -> CB, A -> Sc, A -> AACA, B -> cCS, B -> CC, C -> ε, C -> bb} +The list of words is: +["acbbc", ""] + +Your answer: +[{True: ["S", "aA", "aSc", "aCBc", "aBc", "acCSc", "acbbSc", "acbbCBc", "acbbBc", "acbbCCc", "acbbCc", "acbbc"]}, {True: ["S", "CB", "B", "CC", "C", ""]}] diff --git a/PythonAPI/sendRequest.py b/PythonAPI/sendRequest.py index 3a52c98..92cbf6b 100644 --- a/PythonAPI/sendRequest.py +++ b/PythonAPI/sendRequest.py @@ -4,24 +4,25 @@ from openai import OpenAI client = OpenAI() GPT_MODEL = "gpt-3.5-turbo-0125" REPEAT = 3 +r_id = 0 if __name__ == "__main__": with open('grammarWordsPairs.txt') as grammarWordsPairs: - jsonArray = grammarWordsPairs.read() - array = json.loads(jsonArray) - with open("prompt_deutsch.txt", encoding="utf-8") as prompt: + json_object = grammarWordsPairs.read() + dict = json.loads(json_object) + with open("prompts/prompt_deutsch.txt", encoding="utf-8") as prompt: systemMessage = prompt.read() - inputTemplate = {"custom_id": None, "method": "POST", "url": "/v1/chat/completions", "body": {"model": GPT_MODEL, "messages": [{"role": "system", "content": systemMessage}, {"role": "user", "content": None}]}} + inputTemplate = {"custom_id": None, "method": "POST", "url": "/v1/chat/completions", "body": {"model": GPT_MODEL, "messages": [{"role": "system", "content": systemMessage}, {"role": "user", "content": None}], "response_format": {"type": "json_object"}}} requestId = 0 - for e in array: - for key in e: - userMessage = f"Die Grammatik lautet:\n{key}\nDie Liste der Wörter lautet:\n{e[key]}" - inputTemplate["body"]["messages"][1]["content"] = userMessage - for i in range(REPEAT): - inputTemplate["custom_id"] = f"request-{requestId}" - requestId += 1 - with open('requests.jsonl', 'a', encoding="utf-8") as requests: - requests.write(json.dumps(inputTemplate) + '\n') + for key in dict: + userMessage = f"Die Grammatik lautet:\n{key}\nDie Liste der Wörter lautet:\n{dict[key]}" + inputTemplate["body"]["messages"][1]["content"] = userMessage + for i in range(REPEAT): + inputTemplate["custom_id"] = f"request-{requestId}" + requestId += 1 + request_file_name = f"requests{r_id}" + with open(request_file_name, 'a', encoding="utf-8") as requests: + requests.write(json.dumps(inputTemplate) + '\n') """ with open("requests.jsonl") as allRequests: fileObject = client.files.create( file=allRequests, -- GitLab