ZhiyuanZeng's picture
Upload folder using huggingface_hub
3bf8430 verified
import random
from typing import Optional, List
from ...environment import VerifiableEnvironment
class BlockImage_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1058
prompt_template = \
r"""You are given a {M} × {N} rectangular grid, where each cell represents a stack of identical cube blocks. Each cube has size 1 × 1 × 1, and no rotation or flipping is allowed — all cubes are placed in the same orientation.
You are given a matrix representing the number of cubes stacked on each cell in the grid (the integer at row i and column j indicates how many cube blocks are stacked on the cell located at row i, column j):
{matrix}
The visual representation of a **single cube** follows this fixed format:
$$
\def\arraystretch{1e-10}
\begin{aligned}
&\verb! +---+!\\
&\verb! / /|!\\
&\verb!+---+ |!\quad\textsf{height}\\
&\verb!| | +!\\
&\verb!| |/ !\quad\textsf{width}\\
&\verb!+---+ !\\
& \quad\textsf{length}
\end{aligned}
$$
Each `+` represents a corner, `-` spans the cube’s length, `/` shows depth (width), and `|` shows height. Empty space in the final drawing should be represented using `.`.
The 3D isometric projection follows specific stacking rules:
- **Two cubes side by side (left/right):**
$$
\def\arraystretch{1e-10}
\begin{aligned}
\verb!..+---+---+!\\
\verb!./ / /|!\\
\verb!+---+---+ |!\\
\verb!| | | +!\\
\verb!| | |/.!\\
\verb!+---+---+..!\\
\end{aligned}
$$
- **Two cubes stacked vertically (top/bottom):**
$$
\def\arraystretch{1e-10}
\begin{aligned}
\verb!..+---+!\\
\verb!./ /|!\\
\verb!+---+ |!\\
\verb!| | +!\\
\verb!| |/|!\\
\verb!+---+ |!\\
\verb!| | +!\\
\verb!| |/.!\\
\verb!+---+..!\\
\end{aligned}
$$
- **Two cubes front/back (depth):**
$$
\def\arraystretch{1e-10}
\begin{aligned}
\verb!....+---+!\\
\verb!.../ /|!\\
\verb!..+---+ |!\\
\verb!./ /| +!\\
\verb!+---+ |/.!\\
\verb!| | +..!\\
\verb!| |/...!\\
\verb!+---+....!\\
\end{aligned}
$$
The bottom-left corner of the lowest cube in cell ({M}, 1) (bottom row, first column) should align with the bottom-left of the entire drawing.
**Output Format:**
Your final output should be a string matrix of dimensions K × L (i.e., it has K lines separated by line breaks, with each line containing exactly L characters), where K is the number of rows and L is the number of columns **required to draw the 3D structure correctly** according to the rules above.
---
**Example 1**
When the rectangular grid is 1 × 2, and the number of cubes in each cell is as follows:
1 3
The output is (do **NOT** include the backticks or quotes — use the format below exactly):
```
......+---+
...../ /|
....+---+ |
....| | +
....| |/|
....+---+ |
..+-| | +
./ | |/|
+---+---+ |
| | | +
| | |/.
+---+---+..
```
---
**Example 2**
When the rectangular grid is 3 × 4, and the number of cubes in each cell is as follows:
2 2 1 2
2 2 1 1
3 2 1 2
The output is (do **NOT** include the backticks or quotes — use the format below exactly):
```
......+---+---+...+---+
..+---+ / /|../ /|
./ /|-+---+ |.+---+ |
+---+ |/ /| +-| | +
| | +---+ |/+---+ |/|
| |/ /| +/ /|-+ |
+---+---+ |/+---+ |/| +
| | | +-| | + |/.
| | |/ | |/| +..
+---+---+---+---+ |/...
| | | | | +....
| | | | |/.....
+---+---+---+---+......
```
"""
def __init__(self,
max_height : int = 5,
wrong_format : float = -1.0, invalid_answer : int = -0.5, wrong_size : int = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 2.0,
**kwargs) :
"""
Initialize the BlockImage_Environment instance.
"""
super().__init__(**kwargs)
self.max_height = max_height
self.rewards = {
"wrong_format" : wrong_format,
"invalid_answer" : invalid_answer,
"wrong_size" : wrong_size,
"rewarding_strategy" : rewarding_strategy,
"rewarding_weight" : rewarding_weight,
"rewarding_beta" : rewarding_beta,
}
def _generate(self) -> None :
assert "MAX_M_N" in self.parameter, "MAX_M_N is required in parameter"
MAX_M_N = self.parameter["MAX_M_N"]
assert MAX_M_N >= 1, "MAX_M_N should be greater than or equal to 1"
M = self.parameter["M"] = random.randint(1, MAX_M_N)
N = self.parameter["N"] = random.randint(1, MAX_M_N)
grid = self.parameter["grid"] = [[random.randint(1, self.max_height) for j in range(N)] for i in range(M)]
max_row = 0
max_col = 0
for i in range(M) :
for j in range(N) :
a = grid[i][j]
t = M - i - 1
cand_col = 2 * t + 4 * j + 6
if cand_col > max_col :
max_col = cand_col
cand_row = 2 * t + 3 * (a - 1) + 5
if cand_row > max_row :
max_row = cand_row
height = max_row + 1
width = max_col + 1
canvas = [['.' for _ in range(width)] for _ in range(height)]
template = [
"..+---+",
"./ /|",
"+---+ |",
"| | +",
"| |/.",
"+---+.."
]
for i in range(M) :
for j in range(N) :
a = grid[i][j]
t = M - i - 1
for k in range(a) :
x_offset = 2 * t + 4 * j
y_offset = 2 * t + 3 * k
for r in range(6) :
for c in range(7) :
ch = template[r][c]
if ch != '.' :
row_index = y_offset + (5 - r)
col_index = x_offset + c
canvas[row_index][col_index] = ch
output_lines = []
for row in range(height - 1, -1, -1) :
output_lines.append("".join(canvas[row]))
self.parameter["reference_answer"] = "\n".join(output_lines)
def _prompt_generate(self) -> str :
prompt = self.prompt_template
prompt = prompt.replace("{M}", str(self.parameter["M"]))
prompt = prompt.replace("{N}", str(self.parameter["N"]))
prompt = prompt.replace("{matrix}", "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]))
return prompt
def _process(self, answer : Optional[str]) -> Optional[List[str]] :
if answer is not None :
answer = answer.strip()
image = []
for line in answer.splitlines() :
line = line.strip()
if line :
image.append(line)
return image
else :
return None
def scorer(self, output : str) -> float :
processed_result = self.processor(output)
if processed_result is not None :
image = processed_result
if not image :
return self.rewards["wrong_format"]
for row in image :
if len(row) != len(image[0]) :
return self.rewards["wrong_format"]
if not all(ch in ".+-/| " for ch in row) :
return self.rewards["invalid_answer"]
gold_image = self.parameter["reference_answer"].split("\n")
if len(image) != len(gold_image) :
return self.rewards["wrong_size"]
if len(image[0]) != len(gold_image[0]) :
return self.rewards["wrong_size"]
total_correct = 0
for gold_row, row in zip(gold_image, image) :
assert len(gold_row) == len(row)
total_correct += sum(gold_row[i] == row[i] for i in range(len(gold_row)))
total_cells = len(gold_image) * len(gold_image[0])
if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" :
return self.rewards["rewarding_weight"] * (((total_correct / total_cells)) ** self.rewards["rewarding_beta"])
elif self.rewards["rewarding_strategy"] == "gold=answer" :
return self.rewards["rewarding_weight"] * (total_correct == total_cells)
else :
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
else :
return self.rewards["wrong_format"]