first commit

2025-02-20 20:38:50 +00:00
commit ae1cc41f21
15 changed files with 119048 additions and 0 deletions
--- a/evaluate/count_pass.py
+++ b/evaluate/count_pass.py
@@ -0,0 +1,45 @@
+import json
+import pandas as pd
+from collections import defaultdict
+
+# Load the JSON file
+file_path = "solutions.json"  # Adjust this path based on your local directory
+with open(file_path, "r") as f:
+    data = json.load(f)
+
+# Initialize a dictionary to store the structured results
+structured_results = defaultdict(lambda: defaultdict(lambda: {"total": 0, "pass": 0, "syntax_error": 0, "functional_error": 0}))
+
+# Process the data to count various results per LLM and type
+for llm, categories in data.items():
+    for category, modules in categories.items():
+        for module in modules:
+            for solution in module.get("solutions", []):
+                structured_results[category][llm]["total"] += 1
+
+                pass_info = solution.get("pass", "")
+                if pass_info == "true":
+                    structured_results[category][llm]["pass"] += 1
+                elif "Detected error while running simulation" in pass_info:
+                    structured_results[category][llm]["syntax_error"] += 1
+
+                # Functional error count
+                structured_results[category][llm]["functional_error"] = (
+                    structured_results[category][llm]["total"]
+                    - structured_results[category][llm]["syntax_error"]
+                    - structured_results[category][llm]["pass"]
+                )
+
+# Create a DataFrame from the structured results
+df_restructured = pd.DataFrame.from_dict(
+    {category: {llm: f"{counts['pass']} | {counts['functional_error']} | {counts['syntax_error']}" for llm, counts in llms.items()}
+     for category, llms in structured_results.items()},
+    orient="index"
+)
+
+# Save to a CSV file
+csv_output_path = "solution_pass_analysis.csv"  # Adjust the path as needed
+df_restructured.to_csv(csv_output_path)
+
+print(f"CSV file saved at: {csv_output_path}")
+# print(df_restructured)