Train-Local / app.py
tejani's picture
Update app.py
d960f3d verified
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import io
import zipfile
import joblib
from PIL import Image
import warnings
warnings.filterwarnings('ignore')
# Function to load and preprocess data
def load_and_preprocess_data(file):
try:
data = pd.read_csv(file.name)
# Convert suits and ranks to numerical values
suit_order = {'spades': 0, 'hearts': 1, 'clubs': 2, 'diamonds': 3}
rank_order = {'ace': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9,
'jack': 10, 'queen': 11, 'king': 12}
data['Dragon Suit Num'] = data['Dragon Suit'].map(suit_order)
data['Dragon Rank Num'] = data['Dragon Rank'].map(rank_order)
data['Tiger Suit Num'] = data['Tiger Suit'].map(suit_order)
data['Tiger Rank Num'] = data['Tiger Rank'].map(rank_order)
data['Lion Suit Num'] = data['Lion Suit'].map(suit_order)
data['Lion Rank Num'] = data['Lion Rank'].map(rank_order)
return data, None
except Exception as e:
return None, f"Error loading data: {str(e)}"
# Feature engineering
def create_features(data, n_games=3):
features = []
for i in range(n_games, len(data)):
game_features = []
for j in range(1, n_games + 1):
game_features.extend([
data['Dragon Suit Num'].iloc[i - j],
data['Dragon Rank Num'].iloc[i - j],
data['Tiger Suit Num'].iloc[i - j],
data['Tiger Rank Num'].iloc[i - j],
data['Lion Suit Num'].iloc[i - j],
data['Lion Rank Num'].iloc[i - j]
])
for j in range(1, n_games + 1):
game_features.extend([
data['Dragon Suit Num'].iloc[i - j] * data['Dragon Rank Num'].iloc[i - j],
data['Tiger Suit Num'].iloc[i - j] * data['Tiger Rank Num'].iloc[i - j],
data['Lion Suit Num'].iloc[i - j] * data['Lion Rank Num'].iloc[i - j]
])
recent_games = data.iloc[i-n_games:i]
suit_freq = recent_games[['Dragon Suit Num', 'Tiger Suit Num', 'Lion Suit Num']].values.flatten()
rank_freq = recent_games[['Dragon Rank Num', 'Tiger Rank Num', 'Lion Rank Num']].values.flatten()
game_features.extend([
np.mean(suit_freq), np.std(suit_freq),
np.mean(rank_freq), np.std(rank_freq)
])
features.append(game_features)
columns = ([f'{hand}_{attr}_t-{j}' for j in range(1, n_games + 1)
for hand in ['Dragon', 'Tiger', 'Lion'] for attr in ['Suit', 'Rank']] +
[f'{hand}_suit_rank_inter_t-{j}' for j in range(1, n_games + 1)
for hand in ['Dragon', 'Tiger', 'Lion']] +
['suit_mean', 'suit_std', 'rank_mean', 'rank_std'])
return pd.DataFrame(features, columns=columns)
# Function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, title):
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(title)
plt.xlabel('Predicted')
plt.ylabel('Actual')
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
img = Image.open(buf)
plt.close()
return img
# Function to plot accuracy bar chart
def plot_accuracy_chart(accuracies):
plt.figure(figsize=(8, 5))
plt.bar(accuracies.keys(), accuracies.values(), color='skyblue')
plt.title('Model Accuracy Comparison')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)
plt.ylim(0, 1)
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
img = Image.open(buf)
plt.close()
return img
# Function to create a ZIP file of models
def create_model_zip(models):
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for model_name, model in models.items():
model_buffer = io.BytesIO()
joblib.dump(model, model_buffer)
model_buffer.seek(0)
zip_file.writestr(f"{model_name}_model.pkl", model_buffer.getvalue())
zip_buffer.seek(0)
return zip_buffer
# Training function with progress tracking and model saving
def train_model(file, n_estimators, learning_rate, max_depth, subsample, progress=gr.Progress()):
progress(0, desc="Starting...")
results = []
try:
# Load and preprocess data
progress(0.1, desc="Loading and preprocessing data...")
data, error = load_and_preprocess_data(file)
if error:
return error, None, None, None
# Create features
progress(0.2, desc="Engineering features...")
n_games = 3
features = create_features(data, n_games)
targets = {
'dragon_suit': data['Dragon Suit Num'][n_games:],
'dragon_rank': data['Dragon Rank Num'][n_games:],
'tiger_suit': data['Tiger Suit Num'][n_games:],
'tiger_rank': data['Tiger Rank Num'][n_games:],
'lion_suit': data['Lion Suit Num'][n_games:],
'lion_rank': data['Lion Rank Num'][n_games:]
}
# Scale features
progress(0.3, desc="Scaling features...")
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
features_scaled = pd.DataFrame(features_scaled, columns=features.columns)
accuracies = {}
confusion_matrices = []
trained_models = {}
# Train models
for i, (target_name, target) in enumerate(targets.items()):
progress(0.4 + (i / len(targets)) * 0.4, desc=f"Training {target_name} model...")
# Split data
X_train, X_test, y_train, y_test = train_test_split(
features_scaled, target, test_size=0.2, random_state=42
)
# Apply SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
# Train model
model = XGBClassifier(
random_state=42,
eval_metric='mlogloss',
n_estimators=int(n_estimators),
learning_rate=float(learning_rate),
max_depth=int(max_depth),
subsample=float(subsample)
)
model.fit(
X_train_res,
y_train_res,
eval_set=[(X_test, y_test)],
early_stopping_rounds=10,
verbose=False
)
# Save model
trained_models[target_name] = model
# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=0)
accuracies[target_name] = accuracy
results.append(f"**{target_name} Results**\n")
results.append(f"Accuracy: {accuracy:.2f}\n")
results.append(f"Classification Report:\n{report}\n")
# Generate confusion matrix plot
cm_plot = plot_confusion_matrix(y_test, y_pred, f"Confusion Matrix - {target_name}")
confusion_matrices.append(cm_plot)
progress(0.9, desc="Generating visualizations and model archive...")
# Generate accuracy bar chart
accuracy_plot = plot_accuracy_chart(accuracies)
# Create ZIP file of models
model_zip = create_model_zip(trained_models)
progress(1.0, desc="Completed!")
return "\n".join(results), accuracy_plot, confusion_matrices, model_zip
except Exception as e:
return f"Error during training: {str(e)}", None, None, None
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Card Game Prediction Model Training")
gr.Markdown("Upload the training dataset and configure hyperparameters to train the model. Track progress, view results, and download trained models.")
file_input = gr.File(label="Upload TRAINING_CARD_DATA.csv")
n_estimators = gr.Slider(50, 300, value=100, step=10, label="Number of Estimators")
learning_rate = gr.Slider(0.01, 0.3, value=0.1, step=0.01, label="Learning Rate")
max_depth = gr.Slider(3, 10, value=5, step=1, label="Max Depth")
subsample = gr.Slider(0.5, 1.0, value=0.8, step=0.1, label="Subsample")
train_button = gr.Button("Train Model")
output_text = gr.Textbox(label="Training Results")
accuracy_plot = gr.Image(label="Accuracy Comparison")
confusion_plots = gr.Gallery(label="Confusion Matrices")
model_download = gr.File(label="Download Trained Models (ZIP)")
train_button.click(
fn=train_model,
inputs=[file_input, n_estimators, learning_rate, max_depth, subsample],
outputs=[output_text, accuracy_plot, confusion_plots, model_download]
)
demo.launch()