CodeV-SVA
Collection
CodeV-SVA: Training Specialized LLMs for Hardware Assertion Generation via RTL-Grounded Bidirectional Data Synthesis
โข
3 items
โข
Updated
Note: CodeV-SVA-no-think-8B is trained without thinking trajectories. It should be used as Qwen3's non-thinking mode (set enable_thinking=False).
We introduce CodeV-SVA, a family of large language models designed to translate natural-language verification properties into SystemVerilog Assertions (SVAs).
Open-Source Plan:
| Model | Download |
|---|---|
| CodeV-SVA-8B | ๐คHuggingFace |
| CodeV-SVA-no-think-8B | ๐คHuggingFace |
| CodeV-SVA-14B | ๐คHuggingFace |
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
MODEL_DIR = "/path/to/CodeV-SVA-no-think-8B/"
def get_nl2sva_prompt(testbench: str, problem: str, reset_signal=None) -> str:
prompt = ""
prompt += "Here is the testbench to perform your translation:\n"
prompt += testbench
prompt += "\nQuestion: Create a SVA assertion that checks: "
prompt += problem + "\n"
if reset_signal is not None:
prompt += f"You should use `{reset_signal}` as the disable condition signal. "
example = f'''asrt: assert property (@(posedge clk) disable iff ({reset_signal})
(a && b) != 1'b1
);'''
else:
example = '''asrt: assert property (@(posedge clk)
(a && b) != 1'b1
);'''
prompt += f"""Do not add code to output an error message string.
Enclose your SVA code with ```systemverilog and ```. Only output the code snippet and do NOT output anything else.
For example,
```systemverilog
{example}
```
Answer:"""
return prompt
if __name__ == "__main__":
system_prompt = "You are an AI assistant tasked with formal verification of register transfer level (RTL) designs.\nYour job is to translate a description of an assertion to concrete SystemVerilog Assertion (SVA) implementation.\n"
testbench = "module fifo_1r1w_tb (\n clk,\n reset_,\n wr_vld,\n wr_data,\n wr_ready,\n rd_vld,\n rd_data,\n rd_ready\n );\n \n parameter FIFO_DEPTH = 4;\n parameter DATA_WIDTH = 1;\n\nlocalparam FIFO_DEPTH_log2 = $clog2(FIFO_DEPTH); \nlocalparam DATA_WIDTH_log2 = $clog2(DATA_WIDTH); \n\n input clk;\n input reset_;\n input wr_vld;\n input [DATA_WIDTH-1:0] wr_data;\n input wr_ready;\n input rd_vld;\n input [DATA_WIDTH-1:0] rd_data;\n input rd_ready;\n\nwire wr_push;\nwire rd_pop;\n\nwire tb_reset;\nassign tb_reset = (reset_ == 1'b0);\n\nwire fifo_full;\nassign wr_push = wr_vld && wr_ready;\nassign rd_pop = rd_vld && rd_ready; \n\nreg [DATA_WIDTH-1:0] fifo_array [FIFO_DEPTH-1:0]; //fifo array - shift register\nreg [FIFO_DEPTH_log2-1:0] fifo_rd_ptr; //fifo array - rd_ptr\nwire actual_fifo_pop; // actual pop == pop\nreg fifo_empty; // fifo empty\nwire [DATA_WIDTH-1:0] fifo_out_data; // dout\n\n// ---- shift register code start ----\nalways @(posedge clk) begin\n if (!reset_) fifo_array[0] <= 'd0;\n else if (wr_push) begin\n fifo_array[0] <= wr_data;\n end else fifo_array[0] <= fifo_array[0];\nend\nfor (genvar i = 1; i < FIFO_DEPTH; i++ ) begin : loop_id\n always @(posedge clk) begin\n if (!reset_) fifo_array[i] <= 'd0;\n else if (wr_push) begin\n fifo_array[i] <= fifo_array[i-1];\n end else fifo_array[i] <= fifo_array[i];\n end\nend\n\n// ---- read pointer/fifo empty code start ----\nalways @(posedge clk) begin\n if (!reset_) begin\n fifo_rd_ptr <= 'd0;\n end else if (wr_push && fifo_empty) begin\n fifo_rd_ptr <= 'd0;\n end else if (rd_pop && !fifo_empty && (fifo_rd_ptr == 'd0)) begin\n fifo_rd_ptr <= 'd0;\n end else begin\n fifo_rd_ptr <= fifo_rd_ptr + wr_push - rd_pop;\n end\n if (!reset_) begin\n fifo_empty <= 'd1;\n end else if (rd_pop && !fifo_empty && (fifo_rd_ptr == 'd0) && !wr_push) begin\n fifo_empty <= 'd1;\n end else if ((fifo_rd_ptr != 'd0) || wr_push && !rd_pop) begin\n fifo_empty <= 'd0;\n end\nend\n// ---- fifo full and dout code start ----\nassign fifo_full = (fifo_rd_ptr == (FIFO_DEPTH - 1)) && !fifo_empty;\nassign fifo_out_data = fifo_array[fifo_rd_ptr];\n\nendmodule"
problem = "When there is a write push to the FIFO, data is eventually popped. Use the signals 'rd_pop' and 'wr_push'."
reset_signal = "tb_reset"
user_prompt = get_nl2sva_prompt(testbench, problem, reset_signal)
dialog = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
prompt = tokenizer.apply_chat_template(
dialog,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False # Use non-thinking mode
)
print(f"### Prompt:\n{prompt}")
model = LLM(
MODEL_DIR,
tensor_parallel_size=4
)
sampling_params = SamplingParams(
temperature=0.8,
top_p=0.95,
max_tokens=32768,
n=1
)
responses = model.generate(prompt, sampling_params)
formal_statements = []
for i, output in enumerate(responses[0].outputs):
response_text = output.text
print(f"### Response:\n{response_text}")
@misc{CodeV-SVA,
title={CodeV-SVA: Training Specialized LLMs for Hardware Assertion Generation via RTL-Grounded Bidirectional Data Synthesis},
author={Yutong Wu and Chenrui Cao and Pengwei Jin and Di Huang and Rui Zhang and Xishan Zhang and Zidong Du and Qi Guo and Xing Hu},
year={2025},
howpublished={\url{https://huggingface.co/wyt2000/CodeV-SVA-14B}},
}