ysharma's picture
ysharma HF Staff
Update app.py
8a914d9 verified
import gradio as gr
import pandas as pd
from datasets import load_dataset
from datetime import datetime, timedelta
import numpy as np
import os
# ============================================
# DATA LOADING FUNCTION
# ============================================
# Global variable for dataset
df = None
HF_TOKEN = os.environ.get("HF_TOKEN")
def load_data():
"""Load and prepare the hackathon registration dataset"""
global df
try:
dataset = load_dataset("ysharma/gradio-hackathon-registrations-winter-2025", split='train', token= HF_TOKEN)
df = dataset.to_pandas()
# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date
df['hour'] = df['timestamp'].dt.hour
return df
except Exception as e:
print(f"Error loading data: {e}")
return None
def refresh_data():
"""Reload data and return status message with updated plots and stats"""
global df
df = load_data()
if df is None:
return (
"❌ Error loading data",
get_overview_stats(),
get_experience_breakdown(),
create_registrations_over_time(),
create_experience_plot(),
create_gradio_usage_plot(),
create_previous_participation_plot(),
create_channels_plot(),
get_channel_breakdown(),
create_hourly_plot()
)
timestamp = datetime.now().strftime('%B %d, %Y at %H:%M:%S')
return (
f"βœ… Data refreshed successfully at {timestamp}",
get_overview_stats(),
get_experience_breakdown(),
create_registrations_over_time(),
create_experience_plot(),
create_gradio_usage_plot(),
create_previous_participation_plot(),
create_channels_plot(),
get_channel_breakdown(),
create_hourly_plot()
)
# Load data once at startup
df = load_data()
# ============================================
# STATISTICS FUNCTIONS
# ============================================
def get_overview_stats():
"""Generate overview statistics"""
if df is None:
return "Error loading data"
stats = f"""
### πŸ“Š Registration Overview
<br>**Total Registrations:** {len(df):,}
**Registration Period:**
- First: {df['timestamp'].min().strftime('%B %d, %Y at %H:%M')}
- Latest: {df['timestamp'].max().strftime('%B %d, %Y at %H:%M')}
<br>**Peak Registration Day:** {df.groupby('date').size().idxmax()} ({df.groupby('date').size().max()} registrations)
"""
return stats
def get_experience_breakdown():
"""Generate experience level breakdown"""
if df is None:
return "Error loading data"
exp_counts = df['experience_level'].value_counts()
total = len(df)
breakdown = "### πŸ‘¨β€πŸ’» Experience Level Breakdown\n\n"
for level, count in exp_counts.items():
percentage = (count / total) * 100
breakdown += f"**{level}:** {count} ({percentage:.1f}%)\n\n"
return breakdown
def get_channel_breakdown():
"""Generate acquisition channel breakdown"""
if df is None:
return "Error loading data"
channel_counts = df['how_heard'].value_counts().head(10)
total = len(df)
breakdown = "### πŸ“’ Top Acquisition Channels\n\n"
for channel, count in channel_counts.items():
percentage = (count / total) * 100
breakdown += f"**{channel}:** {count} ({percentage:.1f}%)\n\n"
return breakdown
# ============================================
# PLOTTING FUNCTIONS
# ============================================
def create_registrations_over_time():
"""Create cumulative registrations line plot"""
if df is None:
return pd.DataFrame()
daily_reg = df.groupby('date').size().reset_index(name='registrations')
daily_reg['date'] = pd.to_datetime(daily_reg['date'])
daily_reg = daily_reg.sort_values('date')
daily_reg['registrations'] = daily_reg['registrations'].cumsum()
return daily_reg
def create_experience_plot():
"""Create experience level bar plot"""
if df is None:
return pd.DataFrame()
exp_data = df['experience_level'].value_counts().reset_index()
exp_data.columns = ['experience_level', 'count']
# Shorten the labels
exp_data['experience_level'] = exp_data['experience_level'].apply(shorten_experience_label)
return exp_data
def shorten_participation_label(label):
"""Shorten long participation labels for better display"""
label_map = {
'Yes - Multiple hackathons before': 'Multiple Hackathons',
'Yes - One hackathon before': 'One Hackathon',
'No - This is my first': 'First Hackathon',
'No - But attended similar events': 'Similar Events'
}
for key, value in label_map.items():
if key in label:
return value
# Fallback: truncate if longer than 20 chars
return label[:25] + '...' if len(label) > 25 else label
def shorten_experience_label(label):
"""Shorten long experience labels for better display"""
label_map = {
'Expert - Professional AI engineer': 'Expert/Professional',
'Advanced - Built multiple ML apps': 'Advanced',
'Intermediate - Built a few apps': 'Intermediate',
'Beginner - Just getting started': 'Beginner',
'No experience - Complete beginner': 'No Experience'
}
for key, value in label_map.items():
if key in label:
return value
# Fallback: truncate if longer than 20 chars
return label[:20] + '...' if len(label) > 20 else label
def shorten_usage_label(label):
"""Shorten long usage labels for better display"""
label_map = {
'Personal projects - Building side projects': 'Personal',
'Learning - New to Gradio, want to learn': 'Learning',
'Not using yet - Interested to start': 'Interested',
'Academic/Research - University or research': 'Academic',
'Professional work - My company uses Gradio': 'Professional',
'Open source - Contributing to projects': 'Open Source'
}
for key, value in label_map.items():
if key in label:
return value
# Fallback: truncate if longer than 20 chars
return label[:20] + '...' if len(label) > 20 else label
def shorten_channel_label(label):
"""Shorten long acquisition channel labels for better display"""
label_map = {
'Hugging Face email/newsletter': 'HF Email',
'LinkedIn': 'LinkedIn',
'Discord': 'Discord',
'Twitter/X': 'X',
'From a colleague/friend': 'Friend',
'Other': 'Other',
'I participated in June 2025': 'June Hack',
'YouTube': 'YouTube',
'Reddit': 'Reddit',
'Sponsor announcement': 'Sponsor Comms'
}
for key, value in label_map.items():
if key in label:
return value
# Fallback: truncate if longer than 15 chars
return label[:15] + '...' if len(label) > 15 else label
def create_gradio_usage_plot():
"""Create Gradio usage distribution plot"""
if df is None:
return pd.DataFrame()
usage_data = df['gradio_usage'].value_counts().reset_index()
usage_data.columns = ['usage_type', 'count']
# Shorten the labels
usage_data['usage_type'] = usage_data['usage_type'].apply(shorten_usage_label)
return usage_data
def create_hourly_plot():
"""Create hourly registration pattern plot"""
if df is None:
return pd.DataFrame()
hourly_data = df['hour'].value_counts().sort_index().reset_index()
hourly_data.columns = ['hour', 'registrations']
return hourly_data
def create_channels_plot():
"""Create top acquisition channels plot"""
if df is None:
return pd.DataFrame()
channels_data = df['how_heard'].value_counts().head(10).reset_index()
channels_data.columns = ['channel', 'count']
# Shorten the labels
channels_data['channel'] = channels_data['channel'].apply(shorten_channel_label)
return channels_data
def create_previous_participation_plot():
"""Create previous participation plot"""
if df is None:
return pd.DataFrame()
prev_data = df['previous_participation'].value_counts().reset_index()
prev_data.columns = ['participation_type', 'count']
# Shorten the labels
prev_data['participation_type'] = prev_data['participation_type'].apply(shorten_participation_label)
return prev_data
# ============================================
# FILTER FUNCTIONS
# ============================================
def filter_data(experience_filter, days_filter):
"""Filter data based on user selections"""
if df is None:
return pd.DataFrame()
filtered_df = df.copy()
# Filter by experience level
if experience_filter != "All":
filtered_df = filtered_df[filtered_df['experience_level'] == experience_filter]
# Filter by date range
if days_filter != "All Time":
days = int(days_filter.split()[0])
cutoff_date = datetime.now() - timedelta(days=days)
filtered_df = filtered_df[filtered_df['timestamp'] >= cutoff_date]
return filtered_df
def update_filtered_stats(experience_filter, days_filter):
"""Update statistics based on filters"""
filtered_df = filter_data(experience_filter, days_filter)
if len(filtered_df) == 0:
return "No data matches the selected filters."
stats = f"""
### πŸ“Š Filtered Results
**Matching Registrations:** {len(filtered_df):,}
**Average Daily Registrations:** {len(filtered_df) / max(1, (filtered_df['timestamp'].max() - filtered_df['timestamp'].min()).days):.1f}
**Most Common Gradio Usage:**
{filtered_df['gradio_usage'].value_counts().head(3).to_string()}
"""
return stats
# ============================================
# UI CONSTRUCTION
# ============================================
with gr.Blocks(theme="ocean") as demo:
# Header with banner image
with gr.Row():
gr.Image(
value="https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/s4q7RzD3S-8xQ8ecXrSwb.png",
show_label=False,
container=False,
show_download_button=False,
show_share_button=False,
interactive=False,
)
# Refresh button
with gr.Row():
refresh_btn = gr.Button("πŸ”„ Refresh Data", variant="primary")
refresh_status = gr.Markdown("", visible=False)
# Main tabs
with gr.Tabs():
# ============================================
# TAB 1: OVERVIEW
# ============================================
with gr.Tab("πŸ“Š Stats"):
# Stats in single column on mobile, two columns on desktop
with gr.Row():
overview_stats = gr.Markdown(get_overview_stats())
with gr.Row():
experience_breakdown = gr.Markdown(get_experience_breakdown())
# Registrations over time - NO FIXED WIDTH
with gr.Row():
reg_over_time = create_registrations_over_time()
reg_plot = gr.LinePlot(
value=reg_over_time,
x="date",
y="registrations",
title="Cumulative Registrations Over Time",
height=400,
overlay_point=True,
tooltip=["date", "registrations"],
x_title="Date",
y_title="Total Registrations",
container=True
)
# ============================================
# TAB 2: PARTICIPANT INSIGHTS
# ============================================
with gr.Tab("πŸ‘₯ Participants"):
# Experience plot
with gr.Row():
exp_plot_data = create_experience_plot()
exp_plot = gr.BarPlot(
value=exp_plot_data,
x="experience_level",
y="count",
title="Experience Level Distribution",
y_title="Number of Participants",
x_title="Experience Level",
height=400,
container=True
)
# Usage plot
with gr.Row():
usage_plot_data = create_gradio_usage_plot()
usage_plot = gr.BarPlot(
value=usage_plot_data,
x="usage_type",
y="count",
title="How Participants Use Gradio",
y_title="Number of Participants",
x_title="Usage Type",
height=400,
container=True
)
# Previous participation
with gr.Row():
prev_plot_data = create_previous_participation_plot()
prev_plot = gr.BarPlot(
value=prev_plot_data,
x="participation_type",
y="count",
title="Previous Participation",
y_title="Number of Participants",
x_title="Experience Type",
height=350,
container=True
)
# ============================================
# TAB 3: ACQUISITION CHANNELS
# ============================================
with gr.Tab("πŸ“’ Channels"):
# Channels plot
with gr.Row():
channels_plot_data = create_channels_plot()
channels_plot = gr.BarPlot(
value=channels_plot_data,
x="channel",
y="count",
title="Top 10 Acquisition Channels",
y_title="Number of Registrations",
x_title="Channel",
height=400,
container=True
)
# Channel breakdown
with gr.Row():
channel_breakdown = gr.Markdown(get_channel_breakdown())
# ============================================
# TAB 4: REGISTRATION PATTERNS
# ============================================
with gr.Tab("⏰ Patterns"):
with gr.Row():
hourly_plot_data = create_hourly_plot()
hourly_plot = gr.BarPlot(
value=hourly_plot_data,
x="hour",
y="registrations",
title="Registrations by Hour of Day (UTC)",
y_title="Number of Registrations",
x_title="Hour (24-hour format)",
height=400,
container=True
)
# Connect refresh button to update all components
refresh_btn.click(
fn=refresh_data,
inputs=[],
outputs=[
refresh_status,
overview_stats,
experience_breakdown,
reg_plot,
exp_plot,
usage_plot,
prev_plot,
channels_plot,
channel_breakdown,
hourly_plot
]
).then(
fn=lambda: gr.Markdown(visible=True),
outputs=refresh_status
).then(
fn=lambda: gr.Markdown(visible=False),
outputs=refresh_status,
)
# Footer
gr.Markdown("""
<div style="text-align: center; padding: 10px; font-size: 14px;">
<a href="https://huggingface.co/MCP-1st-Birthday" style="margin: 0 10px;">MCP's 1st Birthday Party</a> |
<a href="https://huggingface.co/spaces/MCP-1st-Birthday/gradio-hackathon-registration-winter25" style="margin: 0 10px;">Registration</a>
</div>
""",)
# ============================================
# LAUNCH
# ============================================
if __name__ == "__main__":
demo.launch()