From f9ca5ffafb2b958b4214c2bdb8aa3ca3c46f7676 Mon Sep 17 00:00:00 2001 From: "@nheuser" <nheuser@uni-koblenz.de> Date: Sat, 29 Jun 2024 20:05:48 +0200 Subject: [PATCH] begin of get_result.py and some data structure changes --- PythonAPI/.gitignore | 7 ++- PythonAPI/get_results.py | 122 +++++++++++++++++++++++++++----------- PythonAPI/main.py | 3 +- PythonAPI/send_request.py | 27 ++++----- 4 files changed, 105 insertions(+), 54 deletions(-) diff --git a/PythonAPI/.gitignore b/PythonAPI/.gitignore index a37081b..dd33708 100644 --- a/PythonAPI/.gitignore +++ b/PythonAPI/.gitignore @@ -1,6 +1,9 @@ -/inputData +/input_data /results_algo /requests /batchId.txt /resultsAPI.jsonl -/__pycache__ \ No newline at end of file +/__pycache__ +/error +/output +/check \ No newline at end of file diff --git a/PythonAPI/get_results.py b/PythonAPI/get_results.py index f859df1..aceec13 100644 --- a/PythonAPI/get_results.py +++ b/PythonAPI/get_results.py @@ -3,48 +3,100 @@ from openai import OpenAI client = OpenAI() -def compareAIAnswers(fileId): - resultsAPI = client.files.content(fileId) - resultsAPIList = [] - with open("resultsAPI.jsonl", "rw") as resultAPIFile: - resultAPIFile.write(resultsAPI) - for line in resultAPIFile: - resultsAPIList.append(json.loads(line.strip())) - with open("resultsOfMembership.txt") as resultsAlgo: - json_object = resultsAlgo.read() - resultsAlgoDict = json.loads(json_object) - comparingResultsDict = {} - for r in resultsAPIList: - id = str(int(r["custom_id"].split('-')[-1]) // REPEAT) - algoResult = resultsAlgoDict[id] - apiResult = json.loads(r) - apiResultDict = json.loads(apiResult["response"]["body"]["choices"][0]["message"]["content"]) - i = 0 - for b in apiResultDict: - comparingResultsDict[id] = (b == algoResult[i]) - i += 1 - +def get_batch_id(type, r_id): + path = f"requests/t{type}/batch_id{r_id}.txt" + with open(path) as batch_id_file: + batch_id = batch_id_file.read() + return batch_id +def get_batch_object(type, r_id): + batch_id = get_batch_id(type, r_id) + batch_object = client.batches.retrieve( + batch_id=batch_id + ) + return batch_object -if __name__ == "__main__": - with open("batchId.txt") as batchIdFile: - batch_Id = batchIdFile.read() - batchResult = client.batches.retrieve(batch_id=batch_Id) - match batchResult.status: - case "validating": +def try_retrieve_batch(type, r_id): + batch_object = get_batch_object(type, r_id) + batch_status = batch_object.status + match batch_status: + case 'validating': print("Request is being validated") - case "failed": + case 'failed': print("Request has failed") - case "in_progress": + case 'in_progress': print("Request is being processed") - case "finalizing": + case 'finalizing': print("Results are being prepared") - case "completed": + case 'completed': print("Request has been completed") - compareAIAnswers(batchResult.output_file_id) - case "expired": + output_file_id = batch_object.output_file_id + error_file_id = batch_object.error_file_id + retrieve_batch(output_file_id, error_file_id, type, r_id) + case 'expired': print("the results couldn't be completed in time") - case "cancelling": + case 'cancelling': print("the request is being cancelled") - case "cancelled": + case 'cancelled': print("the request has been cancelled") + +def retrieve_batch(output_file_id, error_file_id, type, r_id): + batch_content = client.files.content(output_file_id) + path = f"output/t{type}/output{r_id}.jsonl" + with open(path, 'w') as output_file: + output_file.write(batch_content) + batch_error = client.files.content(error_file_id) + path = f"error/t{type}/error{r_id}.jsonl" + with open(path, 'w') as error_file: + error_file.write(batch_error) + +def get_output_list(type, r_id): + output_list = [] + path = f"output/t{type}/output{r_id}.jsonl" + with open(path) as output_file: + for line in output_file: + dict = json.loads(line) + output_list.append(dict) + return output_list + +def get_key(custom_id, repeat): + n = int(custom_id.split('-')[-1]) + n //= repeat + return str(n) + +def get_results_algo(type, r_id): + path = f"results_algo/t{type}/results_algo_{r_id}.txt" + with open(path) as results_algo_file: + results_algo_json = results_algo_file.read() + return json.loads(results_algo_json) + +def get_grammar(result_algo_key, input_dict): + i = 0 + for grammar in input_dict: + if i == result_algo_key: + return grammar + i += 1 + +def check_b_output(output_list, repeat, type, r_id): + results_algo_dict = get_results_algo(type, r_id) + check_b = {} + for o in output_list: + results_algo_key = get_key(o['custom_id'], repeat) + if not check_b.get(results_algo_key): + check_b[results_algo_key] = [] + content_json = o['response']['body']['choices'][0]['message']['content'] + content_dict = json.loads(content_json) + results_algo = results_algo_dict[results_algo_key] + i = 0 + compare_output_results = [] + for b in content_dict: + compare_output_results.append(b == str(results_algo[i])) + i += 1 + check_b.get(results_algo_key).append(compare_output_results) + path = f"check/t{type}/check_b{r_id}.txt" + with open(path, 'w') as check_b_file: + check_b_file.write(json.dumps(check_b)) + return check_b + +def check_d_output(output_list, repeat, type, r_id, input_dict) + \ No newline at end of file diff --git a/PythonAPI/main.py b/PythonAPI/main.py index f9d480c..f89c6da 100644 --- a/PythonAPI/main.py +++ b/PythonAPI/main.py @@ -1,3 +1,4 @@ import send_request +import json -send_request.create_request_file(3, 2, 0, 3, 3, 4) \ No newline at end of file +send_request.create_request_file(3, 2, 0) \ No newline at end of file diff --git a/PythonAPI/send_request.py b/PythonAPI/send_request.py index 494f24c..7c1b691 100644 --- a/PythonAPI/send_request.py +++ b/PythonAPI/send_request.py @@ -4,15 +4,12 @@ from openai import OpenAI client = OpenAI() GPT_MODEL = "gpt-3.5-turbo-0125" -def read_input_data(type, r_id , mode, maxConclusios, maxConclusioSize = 0): - path = f"inputData/t{type}/input{r_id}_c" - if type == 2: - path += f"{maxConclusios}_cs{maxConclusioSize}_m{mode}.txt" - else: - path += f"cs{maxConclusios}_m{mode}.txt" +def read_input_data(type, r_id): + path = f"input_data/t{type}/input{r_id}.txt" with open(path) as input_file: - input_json = input_file.read() - input_dict = json.loads(input_json) + input = input_file.read() + input_json = input.split('!')[-1] + input_dict = json.loads(input_json.strip()) return input_dict def read_system_message(german=True): @@ -49,16 +46,14 @@ def build_request_file(input_dict, system_message, repeat, type, r_id): with open(path, "a") as requests_file: requests_file.write(json.dumps(request_template, ensure_ascii=False) + "\n") -def create_request_file(repeat, type, r_id, mode, maxConclusios, maxConclusioSize=0): - if type == 2: - input_dict = read_input_data(type, r_id, mode, maxConclusios, maxConclusioSize) - else: - input_dict = read_input_data(type, r_id, mode, maxConclusios) +def create_request_file(repeat, type, r_id): + input_dict = read_input_data(type, r_id) system_message = read_system_message() build_request_file(input_dict, system_message, repeat, type, r_id) + return input_dict -def run_batch(type, r_id, mode): - path_r = f"requests/t{type}/requests{r_id}_{mode}.jsonl" +def run_batch(type, r_id): + path_r = f"requests/t{type}/requests{r_id}.jsonl" with open(path_r) as request_file: file_object = client.files.create( file=request_file, @@ -70,6 +65,6 @@ def run_batch(type, r_id, mode): endpoint="/v1/chat/completions", completion_window="24h" ) - path_id = f"requests/t{type}/batch_id{r_id}_{mode}.txt" + path_id = f"requests/t{type}/batch_id{r_id}.txt" with open(path_id, "w") as batch_id_file: batch_id_file.write(batch_object.id) -- GitLab