From f9ca5ffafb2b958b4214c2bdb8aa3ca3c46f7676 Mon Sep 17 00:00:00 2001
From: "@nheuser" <nheuser@uni-koblenz.de>
Date: Sat, 29 Jun 2024 20:05:48 +0200
Subject: [PATCH] begin of get_result.py and some data structure changes

---
 PythonAPI/.gitignore      |   7 ++-
 PythonAPI/get_results.py  | 122 +++++++++++++++++++++++++++-----------
 PythonAPI/main.py         |   3 +-
 PythonAPI/send_request.py |  27 ++++-----
 4 files changed, 105 insertions(+), 54 deletions(-)

diff --git a/PythonAPI/.gitignore b/PythonAPI/.gitignore
index a37081b..dd33708 100644
--- a/PythonAPI/.gitignore
+++ b/PythonAPI/.gitignore
@@ -1,6 +1,9 @@
-/inputData
+/input_data
 /results_algo
 /requests
 /batchId.txt
 /resultsAPI.jsonl
-/__pycache__
\ No newline at end of file
+/__pycache__
+/error
+/output
+/check
\ No newline at end of file
diff --git a/PythonAPI/get_results.py b/PythonAPI/get_results.py
index f859df1..aceec13 100644
--- a/PythonAPI/get_results.py
+++ b/PythonAPI/get_results.py
@@ -3,48 +3,100 @@ from openai import OpenAI
 
 client = OpenAI()
 
-def compareAIAnswers(fileId):
-    resultsAPI = client.files.content(fileId)
-    resultsAPIList = []
-    with open("resultsAPI.jsonl", "rw") as resultAPIFile:
-        resultAPIFile.write(resultsAPI)
-        for line in resultAPIFile:
-            resultsAPIList.append(json.loads(line.strip()))
-    with open("resultsOfMembership.txt") as resultsAlgo:
-        json_object = resultsAlgo.read()
-    resultsAlgoDict = json.loads(json_object)
-    comparingResultsDict = {}
-    for r in resultsAPIList:
-        id = str(int(r["custom_id"].split('-')[-1]) // REPEAT)
-        algoResult = resultsAlgoDict[id]
-        apiResult = json.loads(r)
-        apiResultDict = json.loads(apiResult["response"]["body"]["choices"][0]["message"]["content"])
-        i = 0
-        for b in apiResultDict:
-            comparingResultsDict[id] = (b == algoResult[i])
-            i += 1
-
+def get_batch_id(type, r_id):
+    path = f"requests/t{type}/batch_id{r_id}.txt"
+    with open(path) as batch_id_file:
+        batch_id = batch_id_file.read()
+    return batch_id
 
+def get_batch_object(type, r_id):
+    batch_id = get_batch_id(type, r_id)
+    batch_object = client.batches.retrieve(
+        batch_id=batch_id
+    )
+    return batch_object
 
-if __name__ == "__main__":
-    with open("batchId.txt") as batchIdFile:
-        batch_Id = batchIdFile.read()
-    batchResult = client.batches.retrieve(batch_id=batch_Id)
-    match batchResult.status:
-        case "validating":
+def try_retrieve_batch(type, r_id):
+    batch_object = get_batch_object(type, r_id)
+    batch_status = batch_object.status
+    match batch_status:
+        case 'validating':
             print("Request is being validated")
-        case "failed":
+        case 'failed':
             print("Request has failed")
-        case "in_progress":
+        case 'in_progress':
             print("Request is being processed")
-        case "finalizing":
+        case 'finalizing':
             print("Results are being prepared")
-        case "completed":
+        case 'completed':
             print("Request has been completed")
-            compareAIAnswers(batchResult.output_file_id)
-        case "expired":
+            output_file_id = batch_object.output_file_id
+            error_file_id = batch_object.error_file_id
+            retrieve_batch(output_file_id, error_file_id, type, r_id)
+        case 'expired':
             print("the results couldn't be completed in time")
-        case "cancelling":
+        case 'cancelling':
             print("the request is being cancelled")
-        case "cancelled":
+        case 'cancelled':
             print("the request has been cancelled")
+
+def retrieve_batch(output_file_id, error_file_id, type, r_id):
+    batch_content = client.files.content(output_file_id)
+    path = f"output/t{type}/output{r_id}.jsonl"
+    with open(path, 'w') as output_file:
+        output_file.write(batch_content)
+    batch_error = client.files.content(error_file_id)
+    path = f"error/t{type}/error{r_id}.jsonl"
+    with open(path, 'w') as error_file:
+        error_file.write(batch_error)
+
+def get_output_list(type, r_id):
+    output_list = []
+    path = f"output/t{type}/output{r_id}.jsonl"
+    with open(path) as output_file:
+        for line in output_file:
+            dict = json.loads(line)
+            output_list.append(dict)
+    return output_list
+
+def get_key(custom_id, repeat):
+    n = int(custom_id.split('-')[-1])
+    n //= repeat
+    return str(n)
+
+def get_results_algo(type, r_id):
+    path = f"results_algo/t{type}/results_algo_{r_id}.txt"
+    with open(path) as results_algo_file:
+        results_algo_json = results_algo_file.read()
+    return json.loads(results_algo_json)
+
+def get_grammar(result_algo_key, input_dict):
+    i = 0
+    for grammar in input_dict:
+        if i == result_algo_key:
+            return grammar
+        i += 1
+
+def check_b_output(output_list, repeat, type, r_id):
+    results_algo_dict = get_results_algo(type, r_id)
+    check_b = {}
+    for o in output_list:
+        results_algo_key = get_key(o['custom_id'], repeat)
+        if not check_b.get(results_algo_key):
+            check_b[results_algo_key] = []
+        content_json = o['response']['body']['choices'][0]['message']['content']
+        content_dict = json.loads(content_json)
+        results_algo = results_algo_dict[results_algo_key]
+        i = 0
+        compare_output_results = []
+        for b in content_dict:
+            compare_output_results.append(b == str(results_algo[i]))
+            i += 1
+        check_b.get(results_algo_key).append(compare_output_results)
+    path = f"check/t{type}/check_b{r_id}.txt"
+    with open(path, 'w') as check_b_file:
+        check_b_file.write(json.dumps(check_b))
+    return check_b
+
+def check_d_output(output_list, repeat, type, r_id, input_dict)
+        
\ No newline at end of file
diff --git a/PythonAPI/main.py b/PythonAPI/main.py
index f9d480c..f89c6da 100644
--- a/PythonAPI/main.py
+++ b/PythonAPI/main.py
@@ -1,3 +1,4 @@
 import send_request
+import json
 
-send_request.create_request_file(3, 2, 0, 3, 3, 4)
\ No newline at end of file
+send_request.create_request_file(3, 2, 0)
\ No newline at end of file
diff --git a/PythonAPI/send_request.py b/PythonAPI/send_request.py
index 494f24c..7c1b691 100644
--- a/PythonAPI/send_request.py
+++ b/PythonAPI/send_request.py
@@ -4,15 +4,12 @@ from openai import OpenAI
 client = OpenAI()
 GPT_MODEL = "gpt-3.5-turbo-0125"
 
-def read_input_data(type, r_id , mode, maxConclusios, maxConclusioSize = 0):
-    path = f"inputData/t{type}/input{r_id}_c"
-    if type == 2:
-        path += f"{maxConclusios}_cs{maxConclusioSize}_m{mode}.txt"
-    else:
-        path += f"cs{maxConclusios}_m{mode}.txt"
+def read_input_data(type, r_id):
+    path = f"input_data/t{type}/input{r_id}.txt"
     with open(path) as input_file:
-        input_json = input_file.read()
-    input_dict = json.loads(input_json)
+        input = input_file.read()
+    input_json = input.split('!')[-1]
+    input_dict = json.loads(input_json.strip())
     return input_dict
 
 def read_system_message(german=True):
@@ -49,16 +46,14 @@ def build_request_file(input_dict, system_message, repeat, type, r_id):
             with open(path, "a") as requests_file:
                 requests_file.write(json.dumps(request_template, ensure_ascii=False) + "\n")
 
-def create_request_file(repeat, type, r_id, mode, maxConclusios, maxConclusioSize=0):
-    if type == 2:
-        input_dict = read_input_data(type, r_id, mode, maxConclusios, maxConclusioSize)
-    else:
-        input_dict = read_input_data(type, r_id, mode, maxConclusios)
+def create_request_file(repeat, type, r_id):
+    input_dict = read_input_data(type, r_id)
     system_message = read_system_message()
     build_request_file(input_dict, system_message, repeat, type, r_id)
+    return input_dict
 
-def run_batch(type, r_id, mode):
-    path_r = f"requests/t{type}/requests{r_id}_{mode}.jsonl"
+def run_batch(type, r_id):
+    path_r = f"requests/t{type}/requests{r_id}.jsonl"
     with open(path_r) as request_file:
         file_object = client.files.create(
             file=request_file,
@@ -70,6 +65,6 @@ def run_batch(type, r_id, mode):
         endpoint="/v1/chat/completions",
         completion_window="24h"
     )
-    path_id = f"requests/t{type}/batch_id{r_id}_{mode}.txt"
+    path_id = f"requests/t{type}/batch_id{r_id}.txt"
     with open(path_id, "w") as batch_id_file:
         batch_id_file.write(batch_object.id)
-- 
GitLab