first commit

This commit is contained in:
jultrishyyy
2025-02-20 20:38:50 +00:00
commit ae1cc41f21
15 changed files with 119048 additions and 0 deletions

45
evaluate/count_pass.py Normal file
View File

@@ -0,0 +1,45 @@
import json
import pandas as pd
from collections import defaultdict
# Load the JSON file
file_path = "solutions.json" # Adjust this path based on your local directory
with open(file_path, "r") as f:
data = json.load(f)
# Initialize a dictionary to store the structured results
structured_results = defaultdict(lambda: defaultdict(lambda: {"total": 0, "pass": 0, "syntax_error": 0, "functional_error": 0}))
# Process the data to count various results per LLM and type
for llm, categories in data.items():
for category, modules in categories.items():
for module in modules:
for solution in module.get("solutions", []):
structured_results[category][llm]["total"] += 1
pass_info = solution.get("pass", "")
if pass_info == "true":
structured_results[category][llm]["pass"] += 1
elif "Detected error while running simulation" in pass_info:
structured_results[category][llm]["syntax_error"] += 1
# Functional error count
structured_results[category][llm]["functional_error"] = (
structured_results[category][llm]["total"]
- structured_results[category][llm]["syntax_error"]
- structured_results[category][llm]["pass"]
)
# Create a DataFrame from the structured results
df_restructured = pd.DataFrame.from_dict(
{category: {llm: f"{counts['pass']} | {counts['functional_error']} | {counts['syntax_error']}" for llm, counts in llms.items()}
for category, llms in structured_results.items()},
orient="index"
)
# Save to a CSV file
csv_output_path = "solution_pass_analysis.csv" # Adjust the path as needed
df_restructured.to_csv(csv_output_path)
print(f"CSV file saved at: {csv_output_path}")
# print(df_restructured)