Spaces:
Sleeping
Sleeping
| import random | |
| from typing import Optional | |
| from ...environment import VerifiableEnvironment | |
| class CombinationOddSubsequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3773 | |
| prompt_template = \ | |
| r"""You are given a sequence of **distinct** integers: {array} | |
| Please count the number of subsequences (not necessarily contiguous, but the order must be preserved) a[1], ..., a[k] such that: | |
| 1. k ≥ 2 (the subsequence must have at least two elements); | |
| 2. C(a[1], a[2]) × C(a[2], a[3]) × ... × C(a[k−1], a[k]) is **odd**, where C(x, y) denotes the binomial coefficient "x choose y". | |
| **Output Format:** A single integer — the number of such valid subsequences.""" | |
| def __init__(self, | |
| wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0, | |
| **kwargs) : | |
| """ | |
| Initialize the CombinationOddSubsequenceCounting_Environment instance. | |
| """ | |
| super().__init__(**kwargs) | |
| self.rewards = { | |
| "wrong_format": wrong_format, | |
| "rewarding_strategy": rewarding_strategy, | |
| "rewarding_weight": rewarding_weight, | |
| "rewarding_beta": rewarding_beta, | |
| } | |
| def _generate(self) -> None : | |
| assert "N" in self.parameter, "N is required in parameter" | |
| N = self.parameter["N"] | |
| assert N >= 2, "N should be greater than or equal to 2" | |
| A = self.parameter["A"] = random.sample(range(1, 2 * N), N) | |
| random.shuffle(A) | |
| max_val = max(A) | |
| T = [-1] * (max_val + 1) | |
| for i, v in enumerate(A): | |
| T[v] = i | |
| # f[i] = number of non-increasing subsequences starting at i (including length-1) | |
| f = [0] * N | |
| ans = 0 | |
| # DP from right to left | |
| for i in range(N - 1, -1, -1): | |
| mask = A[i] | |
| cnt = 1 # the subsequence [A[i]] itself | |
| # enumerate all non-zero proper submasks j of mask | |
| j = mask & (mask - 1) | |
| while j: | |
| idx = T[j] | |
| # if that value appears later in the sequence, extend subsequences | |
| if idx > i: | |
| cnt += f[idx] | |
| j = mask & (j - 1) | |
| f[i] = cnt | |
| ans += cnt | |
| # subtract the length-1 subsequences to count only those of length >= 2 | |
| ans -= N | |
| self.parameter["reference_answer"] = ans | |
| def _prompt_generate(self) -> str : | |
| return self.prompt_template.format(array = " ".join(map(str, self.parameter["A"]))) | |
| def _process(self, answer : Optional[str]) -> Optional[int] : | |
| if answer is not None : | |
| answer = answer.strip() | |
| try : | |
| int_answer = int(answer) | |
| return int_answer | |
| except ValueError : | |
| return None | |
| else : | |
| return None | |
| def scorer(self, output : str) -> float : | |
| processed_result = self.processor(output) | |
| if processed_result is not None : | |
| if processed_result < 0 : | |
| return self.rewards["wrong_format"] | |
| if self.rewards["rewarding_strategy"] == "(min/max)^beta" : | |
| a, b = self.parameter["reference_answer"], processed_result | |
| if b == 0 : | |
| return self.rewards["rewarding_weight"] * (a == 0) | |
| return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"]) | |
| elif self.rewards["rewarding_strategy"] == "gold=answer" : | |
| return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"]) | |
| else : | |
| raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"])) | |
| else : | |
| return self.rewards["wrong_format"] |