RLVE_Gym

Sleeping

App Files Files Community

RLVE_Gym / server /Gym /environments /combination_odd_subsequence_counting /environment.py

ZhiyuanZeng

Upload folder using huggingface_hub

3bf8430 verified about 1 month ago

raw

history blame contribute delete

3.97 kB

	import random
	from typing import Optional
	from ...environment import VerifiableEnvironment


	class CombinationOddSubsequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3773
	prompt_template = \
	r"""You are given a sequence of distinct integers: {array}

	Please count the number of subsequences (not necessarily contiguous, but the order must be preserved) a[1], ..., a[k] such that:
	1. k ≥ 2 (the subsequence must have at least two elements);
	2. C(a[1], a[2]) × C(a[2], a[3]) × ... × C(a[k−1], a[k]) is odd, where C(x, y) denotes the binomial coefficient "x choose y".

	Output Format: A single integer — the number of such valid subsequences."""

	def __init__(self,
	wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
	**kwargs) :
	"""
	Initialize the CombinationOddSubsequenceCounting_Environment instance.
	"""
	super().__init__(**kwargs)

	self.rewards = {
	"wrong_format": wrong_format,
	"rewarding_strategy": rewarding_strategy,
	"rewarding_weight": rewarding_weight,
	"rewarding_beta": rewarding_beta,
	}


	def _generate(self) -> None :
	assert "N" in self.parameter, "N is required in parameter"
	N = self.parameter["N"]
	assert N >= 2, "N should be greater than or equal to 2"

	A = self.parameter["A"] = random.sample(range(1, 2 * N), N)
	random.shuffle(A)


	max_val = max(A)
	T = [-1] * (max_val + 1)
	for i, v in enumerate(A):
	T[v] = i

	# f[i] = number of non-increasing subsequences starting at i (including length-1)
	f = [0] * N
	ans = 0

	# DP from right to left
	for i in range(N - 1, -1, -1):
	mask = A[i]
	cnt = 1 # the subsequence [A[i]] itself
	# enumerate all non-zero proper submasks j of mask
	j = mask & (mask - 1)
	while j:
	idx = T[j]
	# if that value appears later in the sequence, extend subsequences
	if idx > i:
	cnt += f[idx]
	j = mask & (j - 1)
	f[i] = cnt
	ans += cnt

	# subtract the length-1 subsequences to count only those of length >= 2
	ans -= N
	self.parameter["reference_answer"] = ans


	def _prompt_generate(self) -> str :
	return self.prompt_template.format(array = " ".join(map(str, self.parameter["A"])))


	def _process(self, answer : Optional[str]) -> Optional[int] :
	if answer is not None :
	answer = answer.strip()
	try :
	int_answer = int(answer)
	return int_answer
	except ValueError :
	return None
	else :
	return None


	def scorer(self, output : str) -> float :
	processed_result = self.processor(output)
	if processed_result is not None :
	if processed_result < 0 :
	return self.rewards["wrong_format"]

	if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
	a, b = self.parameter["reference_answer"], processed_result
	if b == 0 :
	return self.rewards["rewarding_weight"] * (a == 0)
	return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
	elif self.rewards["rewarding_strategy"] == "gold=answer" :
	return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
	else :
	raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
	else :
	return self.rewards["wrong_format"]