File size: 5,342 Bytes
9a59bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""

Inference script for Gold Price Direction Predictor



This script demonstrates how to load the model and make predictions.

"""

import pandas as pd
import numpy as np
from joblib import load
from huggingface_hub import hf_hub_download
import warnings
warnings.filterwarnings("ignore")


def load_model():
    """Load the trained model from Hugging Face"""
    try:
        model_path = hf_hub_download("theonegareth/GoldPricePredictor", "gold_direction_model.joblib")
        model = load(model_path)
        print("Model loaded successfully!")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None


def add_features_adaptive(data: pd.DataFrame, price='close') -> pd.DataFrame:
    """

    Feature engineering function (same as used in training)

    """
    out = data.copy()
    n = len(out)

    if n < 8:
        raise ValueError(f"Dataset too small (n={n}). Need at least 8 rows.")

    out['ret'] = out[price].pct_change()
    out['log_ret'] = np.log1p(out['ret'])

    # Adaptive lags and windows
    max_lag = max(1, min(5, n // 6))
    lag_list = list(range(1, max_lag + 1))
    win_candidates = [3, 5, 10, 20]
    win_list = [w for w in win_candidates if w < n-2]
    if not win_list:
        win_list = [3]

    for L in lag_list:
        out[f'ret_lag_{L}'] = out['ret'].shift(L)

    for w in win_list:
        out[f'roll_mean_{w}'] = out['ret'].rolling(w, min_periods=1).mean()
        out[f'roll_std_{w}'] = out['ret'].rolling(w, min_periods=1).std()
        out[f'roll_min_{w}'] = out['ret'].rolling(w, min_periods=1).min()
        out[f'roll_max_{w}'] = out['ret'].rolling(w, min_periods=1).max()

    # RSI
    rsi_w = max(3, min(14, n // 6))
    delta = out[price].diff()
    gain = (delta.where(delta > 0, 0.0)).rolling(rsi_w, min_periods=1).mean()
    loss = (-delta.where(delta < 0, 0.0)).rolling(rsi_w, min_periods=1).mean()
    rs = gain / (loss + 1e-9)
    out['rsi14'] = 100 - (100 / (1 + rs))

    # MACD
    fast = max(6, min(12, n // 5))
    slow = max(fast+4, min(26, n // 3))
    signal = max(5, min(9, n // 6))
    ema_fast = out[price].ewm(span=fast, adjust=False).mean()
    ema_slow = out[price].ewm(span=slow, adjust=False).mean()
    out['macd'] = ema_fast - ema_slow
    out['macd_signal'] = out['macd'].ewm(span=signal, adjust=False).mean()
    out['macd_hist'] = out['macd'] - out['macd_signal']

    # Bollinger
    bb_w = max(5, min(20, n // 4))
    ma = out[price].rolling(bb_w, min_periods=1).mean()
    sd = out[price].rolling(bb_w, min_periods=1).std()
    out['bb_mid'] = ma
    out['bb_up'] = ma + 2*sd
    out['bb_low'] = ma - 2*sd
    out['bb_width'] = (out['bb_up'] - out['bb_low']) / (out['bb_mid'] + 1e-9)

    # Calendar
    out['dow'] = out['date'].dt.weekday
    out['month'] = out['date'].dt.month

    return out


def predict_next_day_direction(model, historical_data: pd.DataFrame, threshold=0.52):
    """

    Predict next-day direction from historical price data



    Parameters:

    - model: Loaded sklearn model

    - historical_data: DataFrame with 'date' and 'close' columns

    - threshold: Probability threshold for prediction (optimized from training)



    Returns:

    - prediction: 1 for up, 0 for down

    - probability: Probability of going up

    """
    # Ensure data is sorted
    historical_data = historical_data.sort_values('date').reset_index(drop=True)

    # Add features
    feat = add_features_adaptive(historical_data, price='close')

    # Drop rows with NaN (lags, etc.)
    feat = feat.dropna(subset=[c for c in feat.columns if c.startswith('ret_lag_')])

    if len(feat) == 0:
        raise ValueError("Not enough data to compute features")

    # Get latest features
    latest_features = feat.iloc[[-1]]

    # Select feature columns (exclude non-feature columns)
    feature_cols = [c for c in latest_features.columns
                   if c not in ['date','close','ret','log_ret','next_close','target']
                   and not c.startswith('roll_') or c in ['roll_mean_3','roll_std_3','roll_min_3','roll_max_3',
                                                        'roll_mean_5','roll_std_5','roll_min_5','roll_max_5']]

    # Ensure we have the right columns (this might need adjustment based on training)
    X = latest_features[feature_cols]

    # Predict
    proba_up = model.predict_proba(X)[:, 1][0]
    prediction = int(proba_up >= threshold)

    direction = "UP ๐Ÿ“ˆ" if prediction == 1 else "DOWN ๐Ÿ“‰"

    return prediction, proba_up, direction


if __name__ == "__main__":
    # Example usage
    model = load_model()

    if model is None:
        print("Failed to load model")
        exit(1)

    # Example historical data (replace with your data)
    example_data = pd.DataFrame({
        'date': pd.date_range('2023-01-01', periods=50, freq='D'),
        'close': np.random.uniform(1000000, 1200000, 50)  # Random prices
    })

    try:
        pred, proba, direction = predict_next_day_direction(model, example_data)
        print(f"Next-day prediction: {direction}")
        print(".3f")
        print(f"Decision threshold: 0.52")
    except Exception as e:
        print(f"Error making prediction: {e}")