RLVE_Gym

Sleeping

App Files Files Community

RLVE_Gym / server /Gym /environments /block_image /environment.py

ZhiyuanZeng

Upload folder using huggingface_hub

3bf8430 verified about 1 month ago

raw

history blame contribute delete

8.52 kB

	import random
	from typing import Optional, List
	from ...environment import VerifiableEnvironment


	class BlockImage_Environment(VerifiableEnvironment) : # Source: https://www.luogu.com.cn/problem/P1058
	prompt_template = \
	r"""You are given a {M} × {N} rectangular grid, where each cell represents a stack of identical cube blocks. Each cube has size 1 × 1 × 1, and no rotation or flipping is allowed — all cubes are placed in the same orientation.
	You are given a matrix representing the number of cubes stacked on each cell in the grid (the integer at row i and column j indicates how many cube blocks are stacked on the cell located at row i, column j):
	{matrix}

	The visual representation of a single cube follows this fixed format:

	$$
	\def\arraystretch{1e-10}
	\begin{aligned}
	&\verb! +---+!\\
	&\verb! / /\|!\\
	&\verb!+---+ \|!\quad\textsf{height}\\
	&\verb!\| \| +!\\
	&\verb!\| \|/ !\quad\textsf{width}\\
	&\verb!+---+ !\\
	& \quad\textsf{length}
	\end{aligned}
	$$

	Each `+` represents a corner, `-` spans the cube’s length, `/` shows depth (width), and `\|` shows height. Empty space in the final drawing should be represented using `.`.

	The 3D isometric projection follows specific stacking rules:

	- Two cubes side by side (left/right):
	$$
	\def\arraystretch{1e-10}
	\begin{aligned}
	\verb!..+---+---+!\\
	\verb!./ / /\|!\\
	\verb!+---+---+ \|!\\
	\verb!\| \| \| +!\\
	\verb!\| \| \|/.!\\
	\verb!+---+---+..!\\
	\end{aligned}
	$$

	- Two cubes stacked vertically (top/bottom):
	$$
	\def\arraystretch{1e-10}
	\begin{aligned}
	\verb!..+---+!\\
	\verb!./ /\|!\\
	\verb!+---+ \|!\\
	\verb!\| \| +!\\
	\verb!\| \|/\|!\\
	\verb!+---+ \|!\\
	\verb!\| \| +!\\
	\verb!\| \|/.!\\
	\verb!+---+..!\\
	\end{aligned}
	$$

	- Two cubes front/back (depth):
	$$
	\def\arraystretch{1e-10}
	\begin{aligned}
	\verb!....+---+!\\
	\verb!.../ /\|!\\
	\verb!..+---+ \|!\\
	\verb!./ /\| +!\\
	\verb!+---+ \|/.!\\
	\verb!\| \| +..!\\
	\verb!\| \|/...!\\
	\verb!+---+....!\\
	\end{aligned}
	$$

	The bottom-left corner of the lowest cube in cell ({M}, 1) (bottom row, first column) should align with the bottom-left of the entire drawing.

	Output Format:
	Your final output should be a string matrix of dimensions K × L (i.e., it has K lines separated by line breaks, with each line containing exactly L characters), where K is the number of rows and L is the number of columns required to draw the 3D structure correctly according to the rules above.

	---

	Example 1

	When the rectangular grid is 1 × 2, and the number of cubes in each cell is as follows:
	1 3

	The output is (do NOT include the backticks or quotes — use the format below exactly):
	```
	......+---+
	...../ /\|
	....+---+ \|
	....\| \| +
	....\| \|/\|
	....+---+ \|
	..+-\| \| +
	./ \| \|/\|
	+---+---+ \|
	\| \| \| +
	\| \| \|/.
	+---+---+..
	```

	---

	Example 2

	When the rectangular grid is 3 × 4, and the number of cubes in each cell is as follows:
	2 2 1 2
	2 2 1 1
	3 2 1 2

	The output is (do NOT include the backticks or quotes — use the format below exactly):
	```
	......+---+---+...+---+
	..+---+ / /\|../ /\|
	./ /\|-+---+ \|.+---+ \|
	+---+ \|/ /\| +-\| \| +
	\| \| +---+ \|/+---+ \|/\|
	\| \|/ /\| +/ /\|-+ \|
	+---+---+ \|/+---+ \|/\| +
	\| \| \| +-\| \| + \|/.
	\| \| \|/ \| \|/\| +..
	+---+---+---+---+ \|/...
	\| \| \| \| \| +....
	\| \| \| \| \|/.....
	+---+---+---+---+......
	```
	"""

	def __init__(self,
	max_height : int = 5,
	wrong_format : float = -1.0, invalid_answer : int = -0.5, wrong_size : int = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 2.0,
	**kwargs) :
	"""
	Initialize the BlockImage_Environment instance.
	"""
	super().__init__(**kwargs)

	self.max_height = max_height

	self.rewards = {
	"wrong_format" : wrong_format,
	"invalid_answer" : invalid_answer,
	"wrong_size" : wrong_size,
	"rewarding_strategy" : rewarding_strategy,
	"rewarding_weight" : rewarding_weight,
	"rewarding_beta" : rewarding_beta,
	}

	def _generate(self) -> None :
	assert "MAX_M_N" in self.parameter, "MAX_M_N is required in parameter"
	MAX_M_N = self.parameter["MAX_M_N"]
	assert MAX_M_N >= 1, "MAX_M_N should be greater than or equal to 1"

	M = self.parameter["M"] = random.randint(1, MAX_M_N)
	N = self.parameter["N"] = random.randint(1, MAX_M_N)
	grid = self.parameter["grid"] = [[random.randint(1, self.max_height) for j in range(N)] for i in range(M)]


	max_row = 0
	max_col = 0
	for i in range(M) :
	for j in range(N) :
	a = grid[i][j]
	t = M - i - 1
	cand_col = 2 * t + 4 * j + 6
	if cand_col > max_col :
	max_col = cand_col
	cand_row = 2 * t + 3 * (a - 1) + 5
	if cand_row > max_row :
	max_row = cand_row


	height = max_row + 1
	width = max_col + 1
	canvas = [['.' for _ in range(width)] for _ in range(height)]
	template = [
	"..+---+",
	"./ /\|",
	"+---+ \|",
	"\| \| +",
	"\| \|/.",
	"+---+.."
	]


	for i in range(M) :
	for j in range(N) :
	a = grid[i][j]
	t = M - i - 1
	for k in range(a) :
	x_offset = 2 * t + 4 * j
	y_offset = 2 * t + 3 * k
	for r in range(6) :
	for c in range(7) :
	ch = template[r][c]
	if ch != '.' :
	row_index = y_offset + (5 - r)
	col_index = x_offset + c
	canvas[row_index][col_index] = ch

	output_lines = []
	for row in range(height - 1, -1, -1) :
	output_lines.append("".join(canvas[row]))
	self.parameter["reference_answer"] = "\n".join(output_lines)

	def _prompt_generate(self) -> str :
	prompt = self.prompt_template
	prompt = prompt.replace("{M}", str(self.parameter["M"]))
	prompt = prompt.replace("{N}", str(self.parameter["N"]))
	prompt = prompt.replace("{matrix}", "\n".join(" ".join(map(str, row)) for row in self.parameter["grid"]))
	return prompt


	def _process(self, answer : Optional[str]) -> Optional[List[str]] :
	if answer is not None :
	answer = answer.strip()
	image = []
	for line in answer.splitlines() :
	line = line.strip()
	if line :
	image.append(line)
	return image
	else :
	return None

	def scorer(self, output : str) -> float :
	processed_result = self.processor(output)
	if processed_result is not None :
	image = processed_result

	if not image :
	return self.rewards["wrong_format"]
	for row in image :
	if len(row) != len(image[0]) :
	return self.rewards["wrong_format"]
	if not all(ch in ".+-/\| " for ch in row) :
	return self.rewards["invalid_answer"]

	gold_image = self.parameter["reference_answer"].split("\n")
	if len(image) != len(gold_image) :
	return self.rewards["wrong_size"]
	if len(image[0]) != len(gold_image[0]) :
	return self.rewards["wrong_size"]

	total_correct = 0
	for gold_row, row in zip(gold_image, image) :
	assert len(gold_row) == len(row)
	total_correct += sum(gold_row[i] == row[i] for i in range(len(gold_row)))
	total_cells = len(gold_image) * len(gold_image[0])

	if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" :
	return self.rewards["rewarding_weight"] * (((total_correct / total_cells)) ** self.rewards["rewarding_beta"])
	elif self.rewards["rewarding_strategy"] == "gold=answer" :
	return self.rewards["rewarding_weight"] * (total_correct == total_cells)
	else :
	raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
	else :
	return self.rewards["wrong_format"]