File size: 5,748 Bytes
3bf8430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from math import gcd
from functools import lru_cache
import random
from typing import Optional
from ...environment import VerifiableEnvironment


class CirculatingDecimalCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1587
    prompt_template = \
r"""Please count how many **distinct pure repeating decimals** (in terms of numeric value) exist in base ${K}$, that can be written as a reduced fraction $\frac{x}{y}$ where $1 \le x \le {N}$ and $1 \le y \le {M}$, with $x$ and $y$ being integers.
A number is called a **pure repeating decimal** if and only if it can be written in the form of $$a.\dot{c_1} c_2 c_3 \dots c_{p - 1} \dot{c_p}$$, where $a$ is an integer, $p \ge 1$, and each $c_i$ ($1 \le i \le p$) is a digit in base ${K}$.

Examples:
- In base 10, $0.454545\ldots = 0.\dot{4}\dot{5}$ is a pure repeating decimal; it can be written as $\frac{5}{11}$ or $\frac{10}{22}$.
- In contrast, $0.166666\ldots = 0.1\dot{6}$ is **not** pure repeating in base 10; it can be written as $\frac{1}{6}$.

Note:
- **Integers are considered pure repeating**, because their decimal part can be represented as a repeating sequence of 0s.
- **Finite decimals with non-zero fractional parts** are **not** considered pure repeating.

**Output Format:** Your final answer should be a single integer — the total number of such distinct pure repeating decimals."""

    def __init__(self,
                 wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
                 **kwargs) :
        """
        Initialize the CirculatingDecimalCounting_Environment instance.
        """
        super().__init__(**kwargs)

        self.rewards = {
            "wrong_format" : wrong_format,
            "rewarding_strategy" : rewarding_strategy,
            "rewarding_weight" : rewarding_weight,
            "rewarding_beta" : rewarding_beta,
        }
    

    def _generate(self) -> None :
        assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
        MAX_N = self.parameter["MAX_N"]
        assert MAX_N >= 1, "MAX_N should be greater than or equal to 1"
        N = self.parameter["N"] = random.randint(1, MAX_N)

        assert "MAX_M" in self.parameter, "MAX_M is required in parameter"
        MAX_M = self.parameter["MAX_M"]
        assert MAX_M >= 1, "MAX_M should be greater than or equal to 1"
        M = self.parameter["M"] = random.randint(1, MAX_M)

        assert "MAX_K" in self.parameter, "MAX_K is required in parameter"
        MAX_K = self.parameter["MAX_K"]
        assert MAX_K >= 2, "MAX_K should be greater than or equal to 2"
        K = self.parameter["K"] = random.randint(2, MAX_K)


        LIM = min(M, max(K, int(M ** 0.5) + 1))

        g = [0] * (K + 1)
        for i in range(1, K + 1):
            g[i] = g[i - 1] + (1 if gcd(i, K) == 1 else 0)

        mu = [0] * (LIM + 1)
        is_comp = [False] * (LIM + 1)
        f = [0] * (LIM + 1)
        primes = []

        mu[1] = 1
        f[1] = 1

        def G(x):
            return (x // K) * g[K] + g[x % K]

        for i in range(2, LIM + 1):
            if not is_comp[i]:
                primes.append(i)
                mu[i] = -1
            for p in primes:
                ip = i * p
                if ip > LIM:
                    break
                is_comp[ip] = True
                if i % p == 0:
                    mu[ip] = 0
                    break
                else:
                    mu[ip] = -mu[i]
            f[i] = f[i - 1] + mu[i] * (G(i) - G(i - 1))

        @lru_cache(None)
        def F(x):
            if x <= LIM:
                return f[x]
            res = 1
            l = 2
            while l <= x:
                t = x // l
                r = x // t
                res -= F(t) * (G(r) - G(l - 1))
                l = r + 1
            return res

        ans = 0
        l = 1
        up = min(N, M)
        while l <= up:
            n_div = N // l
            m_div = M // l
            r = min(N // n_div, M // m_div)
            ans += n_div * G(m_div) * (F(r) - F(l - 1))
            l = r + 1

        assert ans > 0
        self.parameter["reference_answer"] = ans
    

    def _prompt_generate(self) -> str :
        return self.prompt_template.replace(r"{K}", str(self.parameter["K"])) \
                                  .replace(r"{N}", str(self.parameter["N"])) \
                                  .replace(r"{M}", str(self.parameter["M"]))
    

    def _process(self, answer : Optional[str]) -> Optional[int] :
        if answer is not None :
            answer = answer.strip()
            try :
                int_answer = int(answer)
                return int_answer
            except ValueError :
                return None
        else :
            return None

    def scorer(self, output : str) -> float :
        processed_result = self.processor(output)
        if processed_result is not None :
            if processed_result <= 0 :
                return self.rewards["wrong_format"]

            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
                a, b = self.parameter["reference_answer"], processed_result
                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
            elif self.rewards["rewarding_strategy"] == "gold=answer" :
                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
            else :
                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
        else :
            return self.rewards["wrong_format"]