File size: 5,342 Bytes
9a59bc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
"""
Inference script for Gold Price Direction Predictor
This script demonstrates how to load the model and make predictions.
"""
import pandas as pd
import numpy as np
from joblib import load
from huggingface_hub import hf_hub_download
import warnings
warnings.filterwarnings("ignore")
def load_model():
"""Load the trained model from Hugging Face"""
try:
model_path = hf_hub_download("theonegareth/GoldPricePredictor", "gold_direction_model.joblib")
model = load(model_path)
print("Model loaded successfully!")
return model
except Exception as e:
print(f"Error loading model: {e}")
return None
def add_features_adaptive(data: pd.DataFrame, price='close') -> pd.DataFrame:
"""
Feature engineering function (same as used in training)
"""
out = data.copy()
n = len(out)
if n < 8:
raise ValueError(f"Dataset too small (n={n}). Need at least 8 rows.")
out['ret'] = out[price].pct_change()
out['log_ret'] = np.log1p(out['ret'])
# Adaptive lags and windows
max_lag = max(1, min(5, n // 6))
lag_list = list(range(1, max_lag + 1))
win_candidates = [3, 5, 10, 20]
win_list = [w for w in win_candidates if w < n-2]
if not win_list:
win_list = [3]
for L in lag_list:
out[f'ret_lag_{L}'] = out['ret'].shift(L)
for w in win_list:
out[f'roll_mean_{w}'] = out['ret'].rolling(w, min_periods=1).mean()
out[f'roll_std_{w}'] = out['ret'].rolling(w, min_periods=1).std()
out[f'roll_min_{w}'] = out['ret'].rolling(w, min_periods=1).min()
out[f'roll_max_{w}'] = out['ret'].rolling(w, min_periods=1).max()
# RSI
rsi_w = max(3, min(14, n // 6))
delta = out[price].diff()
gain = (delta.where(delta > 0, 0.0)).rolling(rsi_w, min_periods=1).mean()
loss = (-delta.where(delta < 0, 0.0)).rolling(rsi_w, min_periods=1).mean()
rs = gain / (loss + 1e-9)
out['rsi14'] = 100 - (100 / (1 + rs))
# MACD
fast = max(6, min(12, n // 5))
slow = max(fast+4, min(26, n // 3))
signal = max(5, min(9, n // 6))
ema_fast = out[price].ewm(span=fast, adjust=False).mean()
ema_slow = out[price].ewm(span=slow, adjust=False).mean()
out['macd'] = ema_fast - ema_slow
out['macd_signal'] = out['macd'].ewm(span=signal, adjust=False).mean()
out['macd_hist'] = out['macd'] - out['macd_signal']
# Bollinger
bb_w = max(5, min(20, n // 4))
ma = out[price].rolling(bb_w, min_periods=1).mean()
sd = out[price].rolling(bb_w, min_periods=1).std()
out['bb_mid'] = ma
out['bb_up'] = ma + 2*sd
out['bb_low'] = ma - 2*sd
out['bb_width'] = (out['bb_up'] - out['bb_low']) / (out['bb_mid'] + 1e-9)
# Calendar
out['dow'] = out['date'].dt.weekday
out['month'] = out['date'].dt.month
return out
def predict_next_day_direction(model, historical_data: pd.DataFrame, threshold=0.52):
"""
Predict next-day direction from historical price data
Parameters:
- model: Loaded sklearn model
- historical_data: DataFrame with 'date' and 'close' columns
- threshold: Probability threshold for prediction (optimized from training)
Returns:
- prediction: 1 for up, 0 for down
- probability: Probability of going up
"""
# Ensure data is sorted
historical_data = historical_data.sort_values('date').reset_index(drop=True)
# Add features
feat = add_features_adaptive(historical_data, price='close')
# Drop rows with NaN (lags, etc.)
feat = feat.dropna(subset=[c for c in feat.columns if c.startswith('ret_lag_')])
if len(feat) == 0:
raise ValueError("Not enough data to compute features")
# Get latest features
latest_features = feat.iloc[[-1]]
# Select feature columns (exclude non-feature columns)
feature_cols = [c for c in latest_features.columns
if c not in ['date','close','ret','log_ret','next_close','target']
and not c.startswith('roll_') or c in ['roll_mean_3','roll_std_3','roll_min_3','roll_max_3',
'roll_mean_5','roll_std_5','roll_min_5','roll_max_5']]
# Ensure we have the right columns (this might need adjustment based on training)
X = latest_features[feature_cols]
# Predict
proba_up = model.predict_proba(X)[:, 1][0]
prediction = int(proba_up >= threshold)
direction = "UP ๐" if prediction == 1 else "DOWN ๐"
return prediction, proba_up, direction
if __name__ == "__main__":
# Example usage
model = load_model()
if model is None:
print("Failed to load model")
exit(1)
# Example historical data (replace with your data)
example_data = pd.DataFrame({
'date': pd.date_range('2023-01-01', periods=50, freq='D'),
'close': np.random.uniform(1000000, 1200000, 50) # Random prices
})
try:
pred, proba, direction = predict_next_day_direction(model, example_data)
print(f"Next-day prediction: {direction}")
print(".3f")
print(f"Decision threshold: 0.52")
except Exception as e:
print(f"Error making prediction: {e}") |