first commit

This commit is contained in:
jultrishyyy
2025-02-20 20:38:50 +00:00
commit ae1cc41f21
15 changed files with 119048 additions and 0 deletions

109
README.md Normal file
View File

@@ -0,0 +1,109 @@
# FPGA Resource Usage Benchmarking for Verilog Solutions
This repository contains the code for benchmarking FPGA resource usage for Verilog solutions generated by different LLMs.
## Simulation and Synthesis Tools
The simulation tool for functional correctness tests and the synthesis tool for obtaining resource usage are both based on **Vivado**. Please install **Vivado** in advance to run the framework. If you wish to use other tools, modify the relevant Python scripts accordingly.
Some dependencies are listed in `requirements.txt`, which you can install using:
```sh
pip install -r requirements.txt
```
## Benchmark Dataset (`problems.json`)
The `problems.json` file contains our benchmark dataset, formatted as follows:
```json
{
"Combinational Logic": [
{
"module": "parity_8bit",
"Problem": "Implement a Verilog module that computes the parity of an 8-bit input vector. The output should be 1 if the number of '1's in the input is odd, and 0 otherwise.",
"Module header": "module parity_8bit (\n input [7:0] in,\n output out\n);",
"Testbench": "`timescale 1ns / 1ps\n\nmodule parity_8bit_tb; ..."
}
],
"Finite State Machines": []
}
```
You can use this dataset to generate solutions and run functional correctness checks for any LLMs you want to evaluate.
## Experimental Results (`solutions.json` Format)
The `solutions` directory contains our experimental results, formatted as follows:
```json
{
"gpt-3.5-turbo": {
"Combinational Logic": [
{
"module": "parity_8bit",
"solutions": [
{
"solution": "module parity_8bit (input [7:0] in, output out); assign out = in[0] ^ in[1] ^ in[2] ^ in[3] ^ in[4] ^ in[5] ^ in[6] ^ in[7]; endmodule",
"pass": "true",
"resource usage": {
"optimized": {
"LUT": 2,
"FF": 0,
"DSP": 0,
"BRAM": 0,
"IO": 9
},
"primitives": {
"LUT": 2,
"FF": 0,
"DSP": 0,
"BRAM": 0,
"IO": 9
}
}
}
]
}
],
"Finite State Machines": [
{
"module": "fsm_3state",
"solutions": []
}
]
},
"gpt-4o":{}
}
```
## Quick Run Instructions
To quickly run the benchmarking process, copy `solutions.json` from the `solutions` directory to the same directory as `setup.py`, then execute:
```sh
python setup.py -model gpt-4o 5 your_openai_api_key -functional_correctness -resource_usage
```
This command will:
1. Generate 5 solutions for each problem using `gpt-4o`.
2. Run the functional correctness check.
3. Obtain the resource usage report for LUT usage.
The standard script currently supports OpenAI's GPT models. If you want to test other LLMs, please modify `generate_solutions.py` accordingly.
## Running Functional and Resource Usage Tests on Custom Solutions
You can also run the functional test and resource usage analysis on your own solutions. Ensure that your `solutions.json` follows the format above and place it in the same directory as `setup.py`, then execute:
```sh
python setup.py -functional_correctness -resource_usage
```
## Running Individual Tests
To run the **functional correctness check** alone:
```sh
python setup.py -functional_correctness
```
To run **resource usage analysis** alone:
```sh
python setup.py -resource_usage
```

45
evaluate/count_pass.py Normal file
View File

@@ -0,0 +1,45 @@
import json
import pandas as pd
from collections import defaultdict
# Load the JSON file
file_path = "solutions.json" # Adjust this path based on your local directory
with open(file_path, "r") as f:
data = json.load(f)
# Initialize a dictionary to store the structured results
structured_results = defaultdict(lambda: defaultdict(lambda: {"total": 0, "pass": 0, "syntax_error": 0, "functional_error": 0}))
# Process the data to count various results per LLM and type
for llm, categories in data.items():
for category, modules in categories.items():
for module in modules:
for solution in module.get("solutions", []):
structured_results[category][llm]["total"] += 1
pass_info = solution.get("pass", "")
if pass_info == "true":
structured_results[category][llm]["pass"] += 1
elif "Detected error while running simulation" in pass_info:
structured_results[category][llm]["syntax_error"] += 1
# Functional error count
structured_results[category][llm]["functional_error"] = (
structured_results[category][llm]["total"]
- structured_results[category][llm]["syntax_error"]
- structured_results[category][llm]["pass"]
)
# Create a DataFrame from the structured results
df_restructured = pd.DataFrame.from_dict(
{category: {llm: f"{counts['pass']} | {counts['functional_error']} | {counts['syntax_error']}" for llm, counts in llms.items()}
for category, llms in structured_results.items()},
orient="index"
)
# Save to a CSV file
csv_output_path = "solution_pass_analysis.csv" # Adjust the path as needed
df_restructured.to_csv(csv_output_path)
print(f"CSV file saved at: {csv_output_path}")
# print(df_restructured)

View File

@@ -0,0 +1,32 @@
import json
import pandas as pd
from collections import defaultdict
# Load the JSON file
file_path = "solutions.json"
with open(file_path, "r") as f:
data = json.load(f)
# Initialize a dictionary to store the minimal LUT usage for each module and LLM
lut_results = defaultdict(lambda: defaultdict(lambda: float("inf")))
# Process the data to extract the minimum LUT usage per module per LLM
for llm, categories in data.items():
for category, modules in categories.items():
for module_data in modules:
module_name = module_data["module"].replace("_", " ") # Replace underscores with spaces
for solution in module_data.get("solutions", []):
if "resource usage" in solution and "optimized" in solution["resource usage"]:
lut_count = solution["resource usage"]["optimized"].get("LUT", float("inf"))
# Store the minimum LUT usage
lut_results[module_name][llm] = min(lut_results[module_name][llm], lut_count)
# Convert the dictionary into a DataFrame
df_lut = pd.DataFrame.from_dict(lut_results, orient="index")
# Save to a CSV file
csv_output_path = "solution_resource_analysis.csv"
df_lut.to_csv(csv_output_path)
# Print the CSV file path
print(f"CSV file saved at: {csv_output_path}")

136
evaluate/plot_pass.py Normal file
View File

@@ -0,0 +1,136 @@
import json
import matplotlib.pyplot as plt
import re
import seaborn as sns
import pandas as pd
# --- Utility Functions ---
def compute_module_pass(solution_list, k):
"""
Check the first k solutions for a module.
Return 1 if at least one of them has a "pass" value (after stripping and lowercasing) equal to "true",
otherwise return 0.
"""
for sol in solution_list[:k]:
if sol.get("pass", "").strip().lower() == "true":
return 1
return 0
def compute_pass_at_k_for_modules(modules, k):
"""
Given a list of modules (each module is expected to have a "solutions" list),
compute the fraction of modules that pass@k.
"""
total = len(modules)
if total == 0:
return 0
passed = sum(compute_module_pass(mod["solutions"], k) for mod in modules)
return passed / total
def compute_overall_pass_at_k(llm_data, ks):
"""
Given one LLM's data (a dict mapping category names to lists of modules),
compute the overall pass@k (over all modules in all categories).
Returns a dictionary mapping each k to the pass@k value.
"""
all_modules = []
for cat, modules in llm_data.items():
all_modules.extend(modules)
overall = {}
for k in ks:
overall[k] = compute_pass_at_k_for_modules(all_modules, k)
return overall
def compute_category_pass_at_k(llm_data, ks):
"""
For each category (type) in one LLM, compute pass@k.
Returns a dictionary mapping category names to a dictionary of k -> pass@k.
"""
cat_results = {}
for cat, modules in llm_data.items():
k_dict = {}
for k in ks:
k_dict[k] = compute_pass_at_k_for_modules(modules, k)
cat_results[cat] = k_dict
return cat_results
# --- Main processing and plotting ---
# Choose the k values you want to evaluate pass@k for:
ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
# Load the JSON file.
input_json_file = "solutions.json" # adjust filename if necessary
with open(input_json_file, "r") as f:
data = json.load(f)
# We'll store our computed pass@k results per LLM in a dictionary.
llm_results = {}
for llm, llm_data in data.items():
overall = compute_overall_pass_at_k(llm_data, ks)
categories = compute_category_pass_at_k(llm_data, ks)
llm_results[llm] = {
"overall": overall,
"categories": categories
}
# --- Plot Overall Pass@k for each LLM ---
plt.figure(figsize=(10, 6))
for llm, res in llm_results.items():
plt.plot(ks, [res["overall"][k] for k in ks], marker='o', label=llm)
# plt.xticks(ks) # Ensure all values from 1 to 15 are shown
# plt.xlabel("k", fontsize=14)
# plt.ylabel("Overall Pass@k", fontsize=14)
# plt.title("Overall Pass@k across k for each LLM", fontsize=16) # Larger title
# plt.legend(loc="upper left", bbox_to_anchor=(1, 1)) # Legend outside the plot
# plt.grid(True)
# plt.tight_layout()
# plt.savefig("./figures/overall_pass_at_k.png")
# plt.show()
# --- Plot Per-Category Pass@k for all LLMs, one figure per k ---
# First, determine the union of all categories across LLMs.
# Prepare data for heatmap
category_pass_k = {}
for llm, res in llm_results.items():
for cat, kdict in res["categories"].items():
if cat not in category_pass_k:
category_pass_k[cat] = {}
category_pass_k[cat][llm] = kdict[15] # Using Pass@15
# Convert to DataFrame
df_heatmap = pd.DataFrame.from_dict(category_pass_k).T
for k in ks:
# Convert to DataFrame
df_heatmap = pd.DataFrame.from_dict(category_pass_k).T
# Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(df_heatmap, annot=True, cmap="Blues", linewidths=0.5, fmt=".2f")
plt.title("Pass@15 Heatmap for Each LLM Across Categories", fontsize=16, fontweight="bold")
plt.xlabel("LLM", fontsize=14, fontweight="bold")
plt.ylabel("Category", fontsize=14, fontweight="bold")
plt.xticks(rotation=45, ha="right", fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
heatmap_path = f"./figures/per_category_pass_k{k}_heatmap.png"
plt.savefig(heatmap_path)
# --- (Optional) Print the computed results ---
print("Overall Pass@k per LLM:")
for llm, res in llm_results.items():
print(f"{llm}: {res['overall']}")
print("\nPer-Category Pass@k per LLM:")
for llm, res in llm_results.items():
print(f"{llm}:")
for cat, kdict in res["categories"].items():
print(f" {cat}: {kdict}")

View File

@@ -0,0 +1,13 @@
,gpt-3.5-turbo,gpt-4,gpt-4o,gpt-o1-mini,llama3.1-405B,qwen-max,qwen-plus,qwen2.5-coder-32B-instruct,codestral
Combinational Logic,112 | 5 | 3,117 | 3 | 0,120 | 0 | 0,118 | 1 | 1,115 | 2 | 3,117 | 2 | 1,109 | 1 | 10,112 | 2 | 6,120 | 0 | 0
Finite State Machines,23 | 15 | 22,32 | 22 | 6,31 | 24 | 5,39 | 18 | 3,31 | 24 | 5,34 | 26 | 0,27 | 23 | 10,39 | 10 | 11,36 | 6 | 18
Mathematical Functions,13 | 19 | 43,6 | 39 | 30,36 | 10 | 29,46 | 24 | 5,7 | 6 | 62,26 | 27 | 22,20 | 26 | 29,5 | 8 | 62,0 | 3 | 72
Basic Arithmetic Operations,37 | 2 | 36,63 | 8 | 4,66 | 9 | 0,68 | 4 | 3,43 | 2 | 30,38 | 22 | 15,27 | 13 | 35,54 | 6 | 15,62 | 13 | 0
Bitwise and Logical Operations,35 | 0 | 25,55 | 0 | 5,58 | 2 | 0,59 | 0 | 1,52 | 0 | 8,47 | 0 | 13,33 | 11 | 16,36 | 0 | 24,55 | 0 | 5
Pipelining,0 | 59 | 16,11 | 54 | 10,26 | 49 | 0,15 | 38 | 22,7 | 38 | 30,15 | 32 | 28,16 | 26 | 33,21 | 31 | 23,6 | 56 | 13
Polynomial Evaluation,19 | 3 | 53,69 | 0 | 6,74 | 1 | 0,68 | 5 | 2,58 | 6 | 11,55 | 2 | 18,28 | 5 | 42,65 | 7 | 3,69 | 6 | 0
Machine Learning,31 | 3 | 41,60 | 8 | 7,60 | 13 | 2,73 | 1 | 1,45 | 28 | 2,63 | 12 | 0,61 | 12 | 2,57 | 2 | 16,64 | 8 | 3
Financial Computing,9 | 23 | 28,21 | 22 | 17,29 | 13 | 18,20 | 20 | 20,11 | 21 | 28,28 | 15 | 17,15 | 12 | 33,16 | 7 | 37,17 | 23 | 20
Encryption,30 | 0 | 15,30 | 2 | 13,25 | 20 | 0,30 | 0 | 15,26 | 0 | 19,25 | 9 | 11,30 | 1 | 14,30 | 0 | 15,30 | 0 | 15
Physics,45 | 3 | 12,57 | 0 | 3,53 | 4 | 3,54 | 5 | 1,41 | 11 | 8,49 | 7 | 4,40 | 17 | 3,38 | 15 | 7,55 | 2 | 3
Climate,8 | 15 | 37,21 | 30 | 9,41 | 11 | 8,41 | 15 | 4,24 | 23 | 13,38 | 19 | 3,19 | 31 | 10,32 | 14 | 14,28 | 19 | 13
1 gpt-3.5-turbo gpt-4 gpt-4o gpt-o1-mini llama3.1-405B qwen-max qwen-plus qwen2.5-coder-32B-instruct codestral
2 Combinational Logic 112 | 5 | 3 117 | 3 | 0 120 | 0 | 0 118 | 1 | 1 115 | 2 | 3 117 | 2 | 1 109 | 1 | 10 112 | 2 | 6 120 | 0 | 0
3 Finite State Machines 23 | 15 | 22 32 | 22 | 6 31 | 24 | 5 39 | 18 | 3 31 | 24 | 5 34 | 26 | 0 27 | 23 | 10 39 | 10 | 11 36 | 6 | 18
4 Mathematical Functions 13 | 19 | 43 6 | 39 | 30 36 | 10 | 29 46 | 24 | 5 7 | 6 | 62 26 | 27 | 22 20 | 26 | 29 5 | 8 | 62 0 | 3 | 72
5 Basic Arithmetic Operations 37 | 2 | 36 63 | 8 | 4 66 | 9 | 0 68 | 4 | 3 43 | 2 | 30 38 | 22 | 15 27 | 13 | 35 54 | 6 | 15 62 | 13 | 0
6 Bitwise and Logical Operations 35 | 0 | 25 55 | 0 | 5 58 | 2 | 0 59 | 0 | 1 52 | 0 | 8 47 | 0 | 13 33 | 11 | 16 36 | 0 | 24 55 | 0 | 5
7 Pipelining 0 | 59 | 16 11 | 54 | 10 26 | 49 | 0 15 | 38 | 22 7 | 38 | 30 15 | 32 | 28 16 | 26 | 33 21 | 31 | 23 6 | 56 | 13
8 Polynomial Evaluation 19 | 3 | 53 69 | 0 | 6 74 | 1 | 0 68 | 5 | 2 58 | 6 | 11 55 | 2 | 18 28 | 5 | 42 65 | 7 | 3 69 | 6 | 0
9 Machine Learning 31 | 3 | 41 60 | 8 | 7 60 | 13 | 2 73 | 1 | 1 45 | 28 | 2 63 | 12 | 0 61 | 12 | 2 57 | 2 | 16 64 | 8 | 3
10 Financial Computing 9 | 23 | 28 21 | 22 | 17 29 | 13 | 18 20 | 20 | 20 11 | 21 | 28 28 | 15 | 17 15 | 12 | 33 16 | 7 | 37 17 | 23 | 20
11 Encryption 30 | 0 | 15 30 | 2 | 13 25 | 20 | 0 30 | 0 | 15 26 | 0 | 19 25 | 9 | 11 30 | 1 | 14 30 | 0 | 15 30 | 0 | 15
12 Physics 45 | 3 | 12 57 | 0 | 3 53 | 4 | 3 54 | 5 | 1 41 | 11 | 8 49 | 7 | 4 40 | 17 | 3 38 | 15 | 7 55 | 2 | 3
13 Climate 8 | 15 | 37 21 | 30 | 9 41 | 11 | 8 41 | 15 | 4 24 | 23 | 13 38 | 19 | 3 19 | 31 | 10 32 | 14 | 14 28 | 19 | 13

View File

@@ -0,0 +1,57 @@
,gpt-3.5-turbo,gpt-4,gpt-4o,gpt-o1-mini,llama3.1-405B,qwen-max,qwen-plus,qwen2.5-coder-32B-instruct,codestral
parity 8bit,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
mux4to1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
majority,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
bin to gray,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
eq comparator,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
decoder 2to4,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
seven segment decoder,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
priority encoder,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fsm 3state,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
traffic light,1.0,1.0,2.0,0.0,0.0,2.0,3.0,2.0,inf
elevator controller,3.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
vending machine,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0
int sqrt,inf,inf,68.0,177.0,inf,64.0,229.0,173.0,inf
fibonacci,inf,56.0,1.0,56.0,56.0,56.0,inf,inf,inf
mod exp,inf,inf,4466.0,4669.0,inf,1911.0,1678.0,inf,inf
power,inf,79.0,74.0,93.0,inf,93.0,93.0,93.0,inf
log2 int,inf,inf,inf,10.0,20.0,inf,inf,12.0,inf
add 8bit,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0
mult 4bit,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0
abs diff,12.0,12.0,14.0,12.0,12.0,inf,12.0,12.0,12.0
modulo op,82.0,82.0,82.0,82.0,111.0,inf,inf,inf,inf
subtract 8bit,8.0,8.0,8.0,8.0,inf,inf,inf,8.0,8.0
bitwise ops,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0
left shift,10.0,10.0,10.0,10.0,10.0,12.0,12.0,10.0,10.0
bitwise not,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0
rotate left,inf,12.0,12.0,12.0,12.0,12.0,inf,12.0,12.0
pipelined adder,inf,0.0,16.0,inf,0.0,inf,0.0,15.0,inf
pipelined multiplier,inf,inf,77.0,70.0,56.0,inf,70.0,inf,inf
pipelined accumulator,inf,inf,inf,inf,27.0,inf,inf,inf,inf
pipelined max finder,inf,0.0,24.0,0.0,24.0,24.0,24.0,24.0,24.0
pipelined fir,inf,inf,inf,inf,inf,inf,inf,inf,inf
polynomial 1,61.0,61.0,61.0,61.0,61.0,61.0,61.0,61.0,61.0
polynomial 2,49.0,49.0,0.0,91.0,0.0,91.0,0.0,91.0,49.0
polynomial 3,77.0,77.0,77.0,77.0,77.0,77.0,77.0,77.0,77.0
polynomial 4,64.0,33.0,96.0,11.0,108.0,108.0,26.0,18.0,33.0
polynomial 5,inf,0.0,213.0,59.0,16.0,213.0,16.0,16.0,16.0
matrix vector mult,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
relu,8.0,8.0,8.0,8.0,8.0,16.0,8.0,8.0,16.0
gradient descent,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0
mse loss,inf,216.0,64.0,64.0,216.0,64.0,216.0,64.0,64.0
conv2d,inf,0.0,0.0,0.0,inf,0.0,0.0,0.0,0.0
compound interest,inf,13060.0,10135.0,10135.0,52950.0,9247.0,inf,10135.0,52950.0
ddm,inf,815.0,inf,inf,inf,inf,inf,inf,inf
present value,107946.0,107946.0,107946.0,107946.0,107946.0,107946.0,107946.0,107946.0,107946.0
currency converter,inf,inf,0.0,0.0,25.0,0.0,inf,inf,inf
caesar cipher,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
modular add cipher,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
feistel cipher,inf,inf,inf,inf,inf,inf,inf,inf,inf
free fall distance,6.0,6.0,64.0,6.0,6.0,64.0,67.0,64.0,6.0
kinetic energy,70.0,70.0,54.0,54.0,54.0,54.0,54.0,54.0,54.0
potential energy,6.0,6.0,84.0,0.0,6.0,6.0,6.0,6.0,6.0
wavelength,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0,81.0
carbon footprint,174.0,121.0,110.0,92.0,121.0,121.0,110.0,110.0,110.0
heat index,16.0,16.0,201.0,16.0,195.0,16.0,124.0,201.0,201.0
air quality index,inf,inf,128.0,104.0,inf,104.0,116.0,128.0,128.0
solar radiation average,inf,inf,44.0,44.0,44.0,44.0,inf,44.0,inf
1 gpt-3.5-turbo gpt-4 gpt-4o gpt-o1-mini llama3.1-405B qwen-max qwen-plus qwen2.5-coder-32B-instruct codestral
2 parity 8bit 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
3 mux4to1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
4 majority 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
5 bin to gray 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
6 eq comparator 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
7 decoder 2to4 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
8 seven segment decoder 4.0 4.0 4.0 4.0 4.0 4.0 4.0 4.0 4.0
9 priority encoder 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
10 fsm 3state 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
11 traffic light 1.0 1.0 2.0 0.0 0.0 2.0 3.0 2.0 inf
12 elevator controller 3.0 3.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
13 vending machine 1.0 1.0 1.0 1.0 2.0 1.0 1.0 2.0 1.0
14 int sqrt inf inf 68.0 177.0 inf 64.0 229.0 173.0 inf
15 fibonacci inf 56.0 1.0 56.0 56.0 56.0 inf inf inf
16 mod exp inf inf 4466.0 4669.0 inf 1911.0 1678.0 inf inf
17 power inf 79.0 74.0 93.0 inf 93.0 93.0 93.0 inf
18 log2 int inf inf inf 10.0 20.0 inf inf 12.0 inf
19 add 8bit 8.0 8.0 8.0 8.0 8.0 8.0 8.0 8.0 8.0
20 mult 4bit 16.0 16.0 16.0 16.0 16.0 16.0 16.0 16.0 16.0
21 abs diff 12.0 12.0 14.0 12.0 12.0 inf 12.0 12.0 12.0
22 modulo op 82.0 82.0 82.0 82.0 111.0 inf inf inf inf
23 subtract 8bit 8.0 8.0 8.0 8.0 inf inf inf 8.0 8.0
24 bitwise ops 16.0 16.0 16.0 16.0 16.0 16.0 16.0 16.0 16.0
25 left shift 10.0 10.0 10.0 10.0 10.0 12.0 12.0 10.0 10.0
26 bitwise not 8.0 8.0 8.0 8.0 8.0 8.0 8.0 8.0 8.0
27 rotate left inf 12.0 12.0 12.0 12.0 12.0 inf 12.0 12.0
28 pipelined adder inf 0.0 16.0 inf 0.0 inf 0.0 15.0 inf
29 pipelined multiplier inf inf 77.0 70.0 56.0 inf 70.0 inf inf
30 pipelined accumulator inf inf inf inf 27.0 inf inf inf inf
31 pipelined max finder inf 0.0 24.0 0.0 24.0 24.0 24.0 24.0 24.0
32 pipelined fir inf inf inf inf inf inf inf inf inf
33 polynomial 1 61.0 61.0 61.0 61.0 61.0 61.0 61.0 61.0 61.0
34 polynomial 2 49.0 49.0 0.0 91.0 0.0 91.0 0.0 91.0 49.0
35 polynomial 3 77.0 77.0 77.0 77.0 77.0 77.0 77.0 77.0 77.0
36 polynomial 4 64.0 33.0 96.0 11.0 108.0 108.0 26.0 18.0 33.0
37 polynomial 5 inf 0.0 213.0 59.0 16.0 213.0 16.0 16.0 16.0
38 matrix vector mult 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
39 relu 8.0 8.0 8.0 8.0 8.0 16.0 8.0 8.0 16.0
40 gradient descent 47.0 47.0 47.0 47.0 47.0 47.0 47.0 47.0 47.0
41 mse loss inf 216.0 64.0 64.0 216.0 64.0 216.0 64.0 64.0
42 conv2d inf 0.0 0.0 0.0 inf 0.0 0.0 0.0 0.0
43 compound interest inf 13060.0 10135.0 10135.0 52950.0 9247.0 inf 10135.0 52950.0
44 ddm inf 815.0 inf inf inf inf inf inf inf
45 present value 107946.0 107946.0 107946.0 107946.0 107946.0 107946.0 107946.0 107946.0 107946.0
46 currency converter inf inf 0.0 0.0 25.0 0.0 inf inf inf
47 caesar cipher 6.0 6.0 6.0 6.0 6.0 6.0 6.0 6.0 6.0
48 modular add cipher 6.0 6.0 6.0 6.0 6.0 6.0 6.0 6.0 6.0
49 feistel cipher inf inf inf inf inf inf inf inf inf
50 free fall distance 6.0 6.0 64.0 6.0 6.0 64.0 67.0 64.0 6.0
51 kinetic energy 70.0 70.0 54.0 54.0 54.0 54.0 54.0 54.0 54.0
52 potential energy 6.0 6.0 84.0 0.0 6.0 6.0 6.0 6.0 6.0
53 wavelength 81.0 81.0 81.0 81.0 81.0 81.0 81.0 81.0 81.0
54 carbon footprint 174.0 121.0 110.0 92.0 121.0 121.0 110.0 110.0 110.0
55 heat index 16.0 16.0 201.0 16.0 195.0 16.0 124.0 201.0 201.0
56 air quality index inf inf 128.0 104.0 inf 104.0 116.0 128.0 128.0
57 solar radiation average inf inf 44.0 44.0 44.0 44.0 inf 44.0 inf

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

131
functional_correctness.py Normal file
View File

@@ -0,0 +1,131 @@
import json
import os
import re
import subprocess
# File paths
SOLUTIONS_FILE = "solutions.json"
PROBLEMS_FILE = "problems.json"
TEMP_VERILOG_FILE = "temp.v"
TEMP_TESTBENCH_FILE = "testbench.v"
TCL_SCRIPT_FILE = "run_testbench.tcl"
def write_tcl():
# Generate the TCL script for Vivado
tcl_commands = f"""
create_project temp_project ./temp_project -force -part xc7z020clg400-1
set_property source_mgmt_mode All [current_project]
add_files {TEMP_VERILOG_FILE}
add_files -fileset sim_1 {TEMP_TESTBENCH_FILE}
set_property top {top_module} [get_filesets sim_1]
launch_simulation -simset sim_1 -mode behavioral
run 3000ns
close_sim
exit
"""
# Write the Tcl script
with open(TCL_SCRIPT_FILE, "w", encoding="utf-8") as file:
file.write(tcl_commands)
# Function to extract the top module name from the testbench
def extract_top_module_name(testbench_file):
with open(testbench_file, 'r', encoding="utf-8") as file:
for line in file:
match = re.search(r'\s*module\s+(\w+)\s*;', line)
if match:
print(match.group(1))
return match.group(1) # Extract module name
return None # Return None if no module found
def run_functional_correctness():
# Load JSON files
with open(SOLUTIONS_FILE, "r", encoding="utf-8") as file:
solutions_data = json.load(file)
with open(PROBLEMS_FILE, "r", encoding="utf-8") as file:
problems_data = json.load(file)
# Map module names to their testbenches
module_testbenches = {}
for category, problems in problems_data.items():
for problem in problems:
module_name = problem.get("module")
testbench_code = problem.get("Testbench")
if module_name and testbench_code:
module_testbenches[module_name] = testbench_code
# print(module_testbenches.keys())
# Get Vivado path from environment variable
vivado_path = os.environ.get("vivado")
if not vivado_path:
raise EnvironmentError("Vivado environment variable not set.")
vivado_path = os.path.join(vivado_path, "vivado.bat")
# Iterate over solutions and test them
for model, categories in solutions_data.items():
for category, modules in categories.items():
for module_entry in modules:
module_name = module_entry["module"]
# print(module_name)
# print(module_name in module_testbenches.keys())
if module_name not in module_testbenches:
print(f"Skipping {module_name}: No testbench found.")
continue
testbench_code = module_testbenches[module_name]
solutions = module_entry["solutions"]
# Iterate over all solutions
for solution_entry in solutions:
verilog_code = solution_entry["solution"]
# Write the Verilog design to a file
with open(TEMP_VERILOG_FILE, "w", encoding="utf-8") as f:
f.write(verilog_code)
# Write the testbench to a file
with open(TEMP_TESTBENCH_FILE, "w", encoding="utf-8") as f:
f.write(testbench_code)
# Extract the top module name
top_module = extract_top_module_name(TEMP_TESTBENCH_FILE)
if not top_module:
print(f"Error: Could not extract top module from {module_name}. Skipping...")
solution_entry["pass"] = "Error: Could not extract top module."
continue
print(f"Testing module: {module_name} (Top Module: {top_module})")
write_tcl()
# Run Vivado in batch mode
print(f"Running Vivado simulation for {module_name}...")
process = subprocess.run([vivado_path, "-mode", "batch", "-source", TCL_SCRIPT_FILE], capture_output=True, text=True)
# Capture output logs
output_log = process.stdout + "\n" + process.stderr
print(output_log)
test_passed = "All tests passed" in output_log
# Determine pass/fail status
if test_passed:
solution_entry["pass"] = "true"
else:
# Extract relevant error messages
error_lines = "\n".join(line for line in output_log.split("\n") if "error" or "fail" in line.lower())
solution_entry["pass"] = error_lines if error_lines else "Test failed somehow"
print(f"Test result for {module_name}: {'PASS' if test_passed else 'FAIL'}")
# Save results after testing each module
with open(SOLUTIONS_FILE, "w", encoding="utf-8") as file:
json.dump(solutions_data, file, indent=4)
print("All tests completed.")

111
generate_solutions.py Normal file
View File

@@ -0,0 +1,111 @@
import json
import os
import re
from openai import OpenAI
def load_prompt_data(filepath: str) -> dict:
"""
Loads the prompt data from JSON.
"""
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
def load_solutions(filepath: str) -> dict:
"""
Loads the existing solutions JSON, or returns a default if file not found.
"""
if os.path.exists(filepath):
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
return {}
def save_solutions(filepath: str, solutions: dict):
"""
Saves the solutions dictionary to the solutions.json file (pretty-printed).
"""
with open(filepath, "w", encoding="utf-8") as f:
json.dump(solutions, f, indent=4)
def call_LLMs(client, model: str, problem: str, module_header: str) -> str:
"""
Calls the OpenAI chat completion endpoint with the given prompt.
"""
prompt = f"""
Here we assume the SystemVerilog is not supported, so don't use the SystemVerilog syntax, such as break statement.
Please write a Verilog module that solves the following problem efficiently, using the exact module header below:
Problem:
{problem}
Module header (must not be changed):
{module_header}
Remember to return only the JSON format:
{{
"solution": "<verilog code>"
}}
"""
try:
response = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful Verilog coding assistant. Please return a JSON object with a key 'solution' containing the Verilog code."},
{"role": "user", "content": prompt}
],
model=model,
max_tokens=3000,
temperature=1.5,
top_p=0.75,
)
response_content = response.choices[0].message.content.strip()
return response_content
except Exception as e:
print("Error:", str(e))
return json.dumps({"solution": f"Error: {str(e)}"})
def generate_solutions(api_key: str, model_name: str, k: int, prompt_json_file: str = "problems.json", solutions_json_file: str = "solutions.json"):
"""
Generates Verilog solutions for problems using an LLM.
"""
# Initialize OpenAI client
client = OpenAI(api_key=api_key)
# Load the problem data
prompt_data = load_prompt_data(prompt_json_file)
# Load or initialize solutions data
solutions_data = load_solutions(solutions_json_file)
if model_name not in solutions_data:
solutions_data[model_name] = {}
for _ in range(k):
for category, problems in prompt_data.items():
if category not in solutions_data[model_name]:
solutions_data[model_name][category] = []
for item in problems:
problem_statement = item.get("Problem", "")
module_header = item.get("Module header", "")
module_name = item.get("module")
response_json_str = call_LLMs(client, model_name, problem_statement, module_header)
response_json_str = response_json_str.strip('`').replace('json', '').replace('```', '')
try:
response_json = json.loads(response_json_str)
verilog_code = response_json.get("solution", "")
except json.JSONDecodeError:
print(response_json_str)
verilog_code = "Error: Invalid JSON response"
print(f"Processing module: {module_name}")
category_list = solutions_data[model_name][category]
module_entry = next((entry for entry in category_list if entry.get("module") == module_name), None)
if module_entry is None:
module_entry = {"module": module_name, "solutions": []}
category_list.append(module_entry)
module_entry["solutions"].append({"solution": verilog_code, "pass": ""})
save_solutions(solutions_json_file, solutions_data)

362
problems.json Normal file

File diff suppressed because one or more lines are too long

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
matplotlib==3.10.0
openai==1.63.2
pandas==2.2.3
seaborn==0.13.2

194
resource_usage.py Normal file
View File

@@ -0,0 +1,194 @@
import json
import subprocess
import os
import re
def extract_module_name(verilog_code):
"""
Extract the module name from the Verilog code.
Assumes the module declaration is of the form:
module <module_name> (
Returns the module name as a string, or None if not found.
"""
match = re.search(r'\bmodule\s+(\w+)', verilog_code)
if match:
return match.group(1)
return None
def parse_optimized(lines):
"""
Extract resource usage numbers from the main (optimized) report sections.
Returns a dictionary with keys: LUT, FF, DSP, BRAM, IO.
"""
optimized = {"LUT": None, "FF": None, "DSP": None, "BRAM": None, "IO": None}
for line in lines:
m = re.search(r'\|\s*Slice LUTs\*?\s*\|\s*(\d+)', line)
if m:
optimized["LUT"] = int(m.group(1))
m = re.search(r'\|\s*Slice Registers\s*\|\s*(\d+)', line)
if m:
optimized["FF"] = int(m.group(1))
m = re.search(r'\|\s*DSPs\s*\|\s*(\d+)', line)
if m:
optimized["DSP"] = int(m.group(1))
m = re.search(r'\|\s*Block RAM Tile\s*\|\s*(\d+)', line)
if m:
optimized["BRAM"] = int(m.group(1))
m = re.search(r'\|\s*Bonded IOB\s*\|\s*(\d+)', line)
if m:
optimized["IO"] = int(m.group(1))
return optimized
def extract_primitives_section(lines):
"""
Extracts all lines between the "7. Primitives" header and the "8. Black Boxes" header.
"""
start_marker = "7. Primitives"
end_marker = "8. Black Boxes"
start_idx = None
end_idx = None
for idx, line in enumerate(lines):
if start_idx is None and start_marker in line and (idx + 1 < len(lines) and "------" in lines[idx + 1]):
start_idx = idx
elif start_idx is not None and end_marker in line and (idx + 1 < len(lines) and "------" in lines[idx + 1]):
end_idx = idx
break
if start_idx is None or end_idx is None:
return []
return lines[start_idx:end_idx]
def parse_primitives_section(lines):
"""
Parses the primitives section lines to accumulate resource usage.
Returns a dictionary with keys: LUT, FF, DSP, BRAM, IO.
In this example:
- For LUT: sums up any primitive whose name starts with "LUT" (e.g., LUT2, LUT3, ...)
- For IO: sums the usage of IBUF and OBUF.
"""
resources = {"LUT": 0, "FF": 0, "DSP": 0, "BRAM": 0, "IO": 0}
for line in lines:
stripped_line = line.strip()
if not stripped_line.startswith("|"):
continue
parts = stripped_line.split("|")
if len(parts) < 4:
continue
ref_name = parts[1].strip()
used_str = parts[2].strip()
try:
used = int(used_str)
except ValueError:
continue
if ref_name.startswith("LUT"):
resources["LUT"] += used
if ref_name in ("IBUF", "OBUF"):
resources["IO"] += used
# (Add additional processing for FF, DSP, BRAM if necessary.)
return resources
def run_synthesis(solution_code):
"""
Writes the given Verilog solution to a temporary file,
creates a Tcl script for Vivado to run synthesis and generate a utilization report,
runs Vivado in batch mode, and parses the resource usage report.
Returns a dictionary with keys "optimized" and "primitives" containing resource usage.
"""
# Write the Verilog code to a temporary file.
verilog_file = "temp.v"
with open(verilog_file, "w") as f:
f.write(solution_code)
# Extract the module name from the solution code.
top_module = extract_module_name(solution_code)
print(top_module)
if top_module is None:
print("Could not extract module name; using 'temp_top' as a default.")
top_module = "temp_top"
vivado_project = "temp_project"
tcl_script = "synthesis_script.tcl"
# Get the Vivado installation path from the environment variable.
vivado_path_env = os.environ.get("vivado")
if vivado_path_env is None:
print("Error: 'vivado' environment variable is not set.")
return None
vivado_path = os.path.join(vivado_path_env, "vivado.bat")
# Create the Vivado Tcl script.
tcl_commands = f"""
create_project {vivado_project} -force -part xc7z020clg400-1
add_files {verilog_file}
set_property top {top_module} [current_fileset]
# Run synthesis only (no simulation)
synth_design -top {top_module}
# Generate resource utilization report
report_utilization -file resource_usage.rpt
quit
"""
with open(tcl_script, "w") as file:
file.write(tcl_commands)
# Run Vivado in batch mode using the generated Tcl script.
try:
result = subprocess.run(
[vivado_path, "-mode", "batch", "-source", tcl_script],
capture_output=True, text=True, check=True
)
except subprocess.CalledProcessError as e:
print("Synthesis failed:", e)
return None
print(result.stdout)
# Check for the success message in the output.
if "Finished Writing Synthesis Report" in result.stdout:
# Read the resource utilization report.
with open("resource_usage.rpt", "r") as f:
report_lines = f.readlines()
optimized_resources = parse_optimized(report_lines)
primitives_section = extract_primitives_section(report_lines)
primitives_resources = (parse_primitives_section(primitives_section)
if primitives_section else {})
return {"optimized": optimized_resources, "primitives": primitives_resources}
else:
print("Synthesis did not complete successfully.")
return None
def run_resource_usage():
# Load the original JSON.
input_json_file = "solutions.json" # Update this file name if needed.
with open(input_json_file, "r") as f:
data = json.load(f)
# Traverse all top-level keys (e.g., "4o") and all subcategories.
for top_key, top_value in data.items():
# print(top_value.keys())
# exit()
# top_value should be a dict with categories (e.g., "Combinational Logic", "Finite State Machines", etc.)
for category, module_list in top_value.items():
# if category == "Combinational Logic":
# continue
for module in module_list:
for sol in module["solutions"]:
if sol.get("pass", "").strip().lower() == "true":
solution_code = sol["solution"]
print(f"Running synthesis for module '{module['module']}' in category '{category}'")
resource_usage = run_synthesis(solution_code)
if resource_usage:
sol["resource usage"] = resource_usage
else:
sol["resource usage"] = {"optimized": {}, "primitives": {}}
else:
sol["resource usage"] = {"optimized": {}, "primitives": {}}
# Write the updated JSON (with resource usage added) to a new file.
output_json_file = "solutions.json"
with open(output_json_file, "w") as f:
json.dump(data, f, indent=4)
print(f"Updated JSON written to {output_json_file}")

42
setup.py Normal file
View File

@@ -0,0 +1,42 @@
import argparse
import subprocess
from generate_solutions import generate_solutions
from functional_correctness import run_functional_correctness, run_resource_usage
def main():
parser = argparse.ArgumentParser(description="Command-line interface for Verilog solution generation and evaluation.")
parser.add_argument("-generate_solutions", nargs=3, metavar=("MODEL_NAME", "K", "API_KEY"), help="Generate Verilog solutions using the specified model, number of iterations, and API key.")
parser.add_argument("-functional_correctness", action="store_true", help="Run functional correctness evaluation.")
parser.add_argument("-resource_usage", action="store_true", help="Run resource usage evaluation.")
args = parser.parse_args()
if args.generate_solutions:
model_name, k, api_key = args.generate_solutions
generate_solutions(api_key, model_name, int(k))
if args.functional_correctness:
run_functional_correctness()
subprocess.run(["python", "./evaluate/count_pass.py"])
subprocess.run(["python", "./evaluate/plot_pass.py"])
if args.resource_usage:
run_resource_usage()
subprocess.run(["python", "./evaluate/count_resource.py"])
else:
if args.functional_correctness:
run_functional_correctness()
subprocess.run(["python", "./evaluate/count_pass.py"])
subprocess.run(["python", "./evaluate/plot_pass.py"])
if args.resource_usage:
run_resource_usage()
subprocess.run(["python", "./evaluate/count_resource.py"])
if args.resource_usage:
run_resource_usage()
subprocess.run(["python", "./evaluate/count_resource.py"])
if __name__ == "__main__":
main()

60
solutions/sample.json Normal file
View File

@@ -0,0 +1,60 @@
{
"gpt-3.5-turbo": {
"Combinational Logic": [
{
"module": "parity_8bit",
"solutions": [
{
"solution": "module parity_8bit (input [7:0] in, output out); assign out = in[0] ^ in[1] ^ in[2] ^ in[3] ^ in[4] ^ in[5] ^ in[6] ^ in[7]; endmodule",
"pass": "true",
"resource usage": {
"optimized": {
"LUT": 2,
"FF": 0,
"DSP": 0,
"BRAM": 0,
"IO": 9
},
"primitives": {
"LUT": 2,
"FF": 0,
"DSP": 0,
"BRAM": 0,
"IO": 9
}
}
},
{
"solution": "module parity_8bit (input [7:0] in, output out); reg parity; integer i; always @(*) begin parity = 1'b0; for(i=0; i<8; i=i+1) begin if(in[i] == 1'b1) parity = ~parity; end end assign out = parity; endmodule",
"pass": "true",
"resource usage": {
"optimized": {
"LUT": 2,
"FF": 0,
"DSP": 0,
"BRAM": 0,
"IO": 9
},
"primitives": {
"LUT": 2,
"FF": 0,
"DSP": 0,
"BRAM": 0,
"IO": 9
}
}
}
]
}
],
"Finite State Machines": [
{
"module": "fsm_3state",
"solutions": []
}
]
},
"gpt-4o":{
}
}

117752
solutions/solutions.json Normal file

File diff suppressed because one or more lines are too long