ZhiyuanZeng's picture
Upload folder using huggingface_hub
3bf8430 verified
import random
from typing import Optional
from ...environment import VerifiableEnvironment
class CombinationOddSubsequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3773
prompt_template = \
r"""You are given a sequence of **distinct** integers: {array}
Please count the number of subsequences (not necessarily contiguous, but the order must be preserved) a[1], ..., a[k] such that:
1. k ≥ 2 (the subsequence must have at least two elements);
2. C(a[1], a[2]) × C(a[2], a[3]) × ... × C(a[k−1], a[k]) is **odd**, where C(x, y) denotes the binomial coefficient "x choose y".
**Output Format:** A single integer — the number of such valid subsequences."""
def __init__(self,
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
**kwargs) :
"""
Initialize the CombinationOddSubsequenceCounting_Environment instance.
"""
super().__init__(**kwargs)
self.rewards = {
"wrong_format": wrong_format,
"rewarding_strategy": rewarding_strategy,
"rewarding_weight": rewarding_weight,
"rewarding_beta": rewarding_beta,
}
def _generate(self) -> None :
assert "N" in self.parameter, "N is required in parameter"
N = self.parameter["N"]
assert N >= 2, "N should be greater than or equal to 2"
A = self.parameter["A"] = random.sample(range(1, 2 * N), N)
random.shuffle(A)
max_val = max(A)
T = [-1] * (max_val + 1)
for i, v in enumerate(A):
T[v] = i
# f[i] = number of non-increasing subsequences starting at i (including length-1)
f = [0] * N
ans = 0
# DP from right to left
for i in range(N - 1, -1, -1):
mask = A[i]
cnt = 1 # the subsequence [A[i]] itself
# enumerate all non-zero proper submasks j of mask
j = mask & (mask - 1)
while j:
idx = T[j]
# if that value appears later in the sequence, extend subsequences
if idx > i:
cnt += f[idx]
j = mask & (j - 1)
f[i] = cnt
ans += cnt
# subtract the length-1 subsequences to count only those of length >= 2
ans -= N
self.parameter["reference_answer"] = ans
def _prompt_generate(self) -> str :
return self.prompt_template.format(array = " ".join(map(str, self.parameter["A"])))
def _process(self, answer : Optional[str]) -> Optional[int] :
if answer is not None :
answer = answer.strip()
try :
int_answer = int(answer)
return int_answer
except ValueError :
return None
else :
return None
def scorer(self, output : str) -> float :
processed_result = self.processor(output)
if processed_result is not None :
if processed_result < 0 :
return self.rewards["wrong_format"]
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
a, b = self.parameter["reference_answer"], processed_result
if b == 0 :
return self.rewards["rewarding_weight"] * (a == 0)
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
elif self.rewards["rewarding_strategy"] == "gold=answer" :
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
else :
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
else :
return self.rewards["wrong_format"]