Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset | |
| from datetime import datetime, timedelta | |
| import numpy as np | |
| import os | |
| # ============================================ | |
| # DATA LOADING FUNCTION | |
| # ============================================ | |
| # Global variable for dataset | |
| df = None | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| def load_data(): | |
| """Load and prepare the hackathon registration dataset""" | |
| global df | |
| try: | |
| dataset = load_dataset("ysharma/gradio-hackathon-registrations-winter-2025", split='train', token= HF_TOKEN) | |
| df = dataset.to_pandas() | |
| # Convert timestamp to datetime | |
| df['timestamp'] = pd.to_datetime(df['timestamp']) | |
| df['date'] = df['timestamp'].dt.date | |
| df['hour'] = df['timestamp'].dt.hour | |
| return df | |
| except Exception as e: | |
| print(f"Error loading data: {e}") | |
| return None | |
| def refresh_data(): | |
| """Reload data and return status message with updated plots and stats""" | |
| global df | |
| df = load_data() | |
| if df is None: | |
| return ( | |
| "β Error loading data", | |
| get_overview_stats(), | |
| get_experience_breakdown(), | |
| create_registrations_over_time(), | |
| create_experience_plot(), | |
| create_gradio_usage_plot(), | |
| create_previous_participation_plot(), | |
| create_channels_plot(), | |
| get_channel_breakdown(), | |
| create_hourly_plot() | |
| ) | |
| timestamp = datetime.now().strftime('%B %d, %Y at %H:%M:%S') | |
| return ( | |
| f"β Data refreshed successfully at {timestamp}", | |
| get_overview_stats(), | |
| get_experience_breakdown(), | |
| create_registrations_over_time(), | |
| create_experience_plot(), | |
| create_gradio_usage_plot(), | |
| create_previous_participation_plot(), | |
| create_channels_plot(), | |
| get_channel_breakdown(), | |
| create_hourly_plot() | |
| ) | |
| # Load data once at startup | |
| df = load_data() | |
| # ============================================ | |
| # STATISTICS FUNCTIONS | |
| # ============================================ | |
| def get_overview_stats(): | |
| """Generate overview statistics""" | |
| if df is None: | |
| return "Error loading data" | |
| stats = f""" | |
| ### π Registration Overview | |
| <br>**Total Registrations:** {len(df):,} | |
| **Registration Period:** | |
| - First: {df['timestamp'].min().strftime('%B %d, %Y at %H:%M')} | |
| - Latest: {df['timestamp'].max().strftime('%B %d, %Y at %H:%M')} | |
| <br>**Peak Registration Day:** {df.groupby('date').size().idxmax()} ({df.groupby('date').size().max()} registrations) | |
| """ | |
| return stats | |
| def get_experience_breakdown(): | |
| """Generate experience level breakdown""" | |
| if df is None: | |
| return "Error loading data" | |
| exp_counts = df['experience_level'].value_counts() | |
| total = len(df) | |
| breakdown = "### π¨βπ» Experience Level Breakdown\n\n" | |
| for level, count in exp_counts.items(): | |
| percentage = (count / total) * 100 | |
| breakdown += f"**{level}:** {count} ({percentage:.1f}%)\n\n" | |
| return breakdown | |
| def get_channel_breakdown(): | |
| """Generate acquisition channel breakdown""" | |
| if df is None: | |
| return "Error loading data" | |
| channel_counts = df['how_heard'].value_counts().head(10) | |
| total = len(df) | |
| breakdown = "### π’ Top Acquisition Channels\n\n" | |
| for channel, count in channel_counts.items(): | |
| percentage = (count / total) * 100 | |
| breakdown += f"**{channel}:** {count} ({percentage:.1f}%)\n\n" | |
| return breakdown | |
| # ============================================ | |
| # PLOTTING FUNCTIONS | |
| # ============================================ | |
| def create_registrations_over_time(): | |
| """Create cumulative registrations line plot""" | |
| if df is None: | |
| return pd.DataFrame() | |
| daily_reg = df.groupby('date').size().reset_index(name='registrations') | |
| daily_reg['date'] = pd.to_datetime(daily_reg['date']) | |
| daily_reg = daily_reg.sort_values('date') | |
| daily_reg['registrations'] = daily_reg['registrations'].cumsum() | |
| return daily_reg | |
| def create_experience_plot(): | |
| """Create experience level bar plot""" | |
| if df is None: | |
| return pd.DataFrame() | |
| exp_data = df['experience_level'].value_counts().reset_index() | |
| exp_data.columns = ['experience_level', 'count'] | |
| # Shorten the labels | |
| exp_data['experience_level'] = exp_data['experience_level'].apply(shorten_experience_label) | |
| return exp_data | |
| def shorten_participation_label(label): | |
| """Shorten long participation labels for better display""" | |
| label_map = { | |
| 'Yes - Multiple hackathons before': 'Multiple Hackathons', | |
| 'Yes - One hackathon before': 'One Hackathon', | |
| 'No - This is my first': 'First Hackathon', | |
| 'No - But attended similar events': 'Similar Events' | |
| } | |
| for key, value in label_map.items(): | |
| if key in label: | |
| return value | |
| # Fallback: truncate if longer than 20 chars | |
| return label[:25] + '...' if len(label) > 25 else label | |
| def shorten_experience_label(label): | |
| """Shorten long experience labels for better display""" | |
| label_map = { | |
| 'Expert - Professional AI engineer': 'Expert/Professional', | |
| 'Advanced - Built multiple ML apps': 'Advanced', | |
| 'Intermediate - Built a few apps': 'Intermediate', | |
| 'Beginner - Just getting started': 'Beginner', | |
| 'No experience - Complete beginner': 'No Experience' | |
| } | |
| for key, value in label_map.items(): | |
| if key in label: | |
| return value | |
| # Fallback: truncate if longer than 20 chars | |
| return label[:20] + '...' if len(label) > 20 else label | |
| def shorten_usage_label(label): | |
| """Shorten long usage labels for better display""" | |
| label_map = { | |
| 'Personal projects - Building side projects': 'Personal', | |
| 'Learning - New to Gradio, want to learn': 'Learning', | |
| 'Not using yet - Interested to start': 'Interested', | |
| 'Academic/Research - University or research': 'Academic', | |
| 'Professional work - My company uses Gradio': 'Professional', | |
| 'Open source - Contributing to projects': 'Open Source' | |
| } | |
| for key, value in label_map.items(): | |
| if key in label: | |
| return value | |
| # Fallback: truncate if longer than 20 chars | |
| return label[:20] + '...' if len(label) > 20 else label | |
| def shorten_channel_label(label): | |
| """Shorten long acquisition channel labels for better display""" | |
| label_map = { | |
| 'Hugging Face email/newsletter': 'HF Email', | |
| 'LinkedIn': 'LinkedIn', | |
| 'Discord': 'Discord', | |
| 'Twitter/X': 'X', | |
| 'From a colleague/friend': 'Friend', | |
| 'Other': 'Other', | |
| 'I participated in June 2025': 'June Hack', | |
| 'YouTube': 'YouTube', | |
| 'Reddit': 'Reddit', | |
| 'Sponsor announcement': 'Sponsor Comms' | |
| } | |
| for key, value in label_map.items(): | |
| if key in label: | |
| return value | |
| # Fallback: truncate if longer than 15 chars | |
| return label[:15] + '...' if len(label) > 15 else label | |
| def create_gradio_usage_plot(): | |
| """Create Gradio usage distribution plot""" | |
| if df is None: | |
| return pd.DataFrame() | |
| usage_data = df['gradio_usage'].value_counts().reset_index() | |
| usage_data.columns = ['usage_type', 'count'] | |
| # Shorten the labels | |
| usage_data['usage_type'] = usage_data['usage_type'].apply(shorten_usage_label) | |
| return usage_data | |
| def create_hourly_plot(): | |
| """Create hourly registration pattern plot""" | |
| if df is None: | |
| return pd.DataFrame() | |
| hourly_data = df['hour'].value_counts().sort_index().reset_index() | |
| hourly_data.columns = ['hour', 'registrations'] | |
| return hourly_data | |
| def create_channels_plot(): | |
| """Create top acquisition channels plot""" | |
| if df is None: | |
| return pd.DataFrame() | |
| channels_data = df['how_heard'].value_counts().head(10).reset_index() | |
| channels_data.columns = ['channel', 'count'] | |
| # Shorten the labels | |
| channels_data['channel'] = channels_data['channel'].apply(shorten_channel_label) | |
| return channels_data | |
| def create_previous_participation_plot(): | |
| """Create previous participation plot""" | |
| if df is None: | |
| return pd.DataFrame() | |
| prev_data = df['previous_participation'].value_counts().reset_index() | |
| prev_data.columns = ['participation_type', 'count'] | |
| # Shorten the labels | |
| prev_data['participation_type'] = prev_data['participation_type'].apply(shorten_participation_label) | |
| return prev_data | |
| # ============================================ | |
| # FILTER FUNCTIONS | |
| # ============================================ | |
| def filter_data(experience_filter, days_filter): | |
| """Filter data based on user selections""" | |
| if df is None: | |
| return pd.DataFrame() | |
| filtered_df = df.copy() | |
| # Filter by experience level | |
| if experience_filter != "All": | |
| filtered_df = filtered_df[filtered_df['experience_level'] == experience_filter] | |
| # Filter by date range | |
| if days_filter != "All Time": | |
| days = int(days_filter.split()[0]) | |
| cutoff_date = datetime.now() - timedelta(days=days) | |
| filtered_df = filtered_df[filtered_df['timestamp'] >= cutoff_date] | |
| return filtered_df | |
| def update_filtered_stats(experience_filter, days_filter): | |
| """Update statistics based on filters""" | |
| filtered_df = filter_data(experience_filter, days_filter) | |
| if len(filtered_df) == 0: | |
| return "No data matches the selected filters." | |
| stats = f""" | |
| ### π Filtered Results | |
| **Matching Registrations:** {len(filtered_df):,} | |
| **Average Daily Registrations:** {len(filtered_df) / max(1, (filtered_df['timestamp'].max() - filtered_df['timestamp'].min()).days):.1f} | |
| **Most Common Gradio Usage:** | |
| {filtered_df['gradio_usage'].value_counts().head(3).to_string()} | |
| """ | |
| return stats | |
| # ============================================ | |
| # UI CONSTRUCTION | |
| # ============================================ | |
| with gr.Blocks(theme="ocean") as demo: | |
| # Header with banner image | |
| with gr.Row(): | |
| gr.Image( | |
| value="https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/s4q7RzD3S-8xQ8ecXrSwb.png", | |
| show_label=False, | |
| container=False, | |
| show_download_button=False, | |
| show_share_button=False, | |
| interactive=False, | |
| ) | |
| # Refresh button | |
| with gr.Row(): | |
| refresh_btn = gr.Button("π Refresh Data", variant="primary") | |
| refresh_status = gr.Markdown("", visible=False) | |
| # Main tabs | |
| with gr.Tabs(): | |
| # ============================================ | |
| # TAB 1: OVERVIEW | |
| # ============================================ | |
| with gr.Tab("π Stats"): | |
| # Stats in single column on mobile, two columns on desktop | |
| with gr.Row(): | |
| overview_stats = gr.Markdown(get_overview_stats()) | |
| with gr.Row(): | |
| experience_breakdown = gr.Markdown(get_experience_breakdown()) | |
| # Registrations over time - NO FIXED WIDTH | |
| with gr.Row(): | |
| reg_over_time = create_registrations_over_time() | |
| reg_plot = gr.LinePlot( | |
| value=reg_over_time, | |
| x="date", | |
| y="registrations", | |
| title="Cumulative Registrations Over Time", | |
| height=400, | |
| overlay_point=True, | |
| tooltip=["date", "registrations"], | |
| x_title="Date", | |
| y_title="Total Registrations", | |
| container=True | |
| ) | |
| # ============================================ | |
| # TAB 2: PARTICIPANT INSIGHTS | |
| # ============================================ | |
| with gr.Tab("π₯ Participants"): | |
| # Experience plot | |
| with gr.Row(): | |
| exp_plot_data = create_experience_plot() | |
| exp_plot = gr.BarPlot( | |
| value=exp_plot_data, | |
| x="experience_level", | |
| y="count", | |
| title="Experience Level Distribution", | |
| y_title="Number of Participants", | |
| x_title="Experience Level", | |
| height=400, | |
| container=True | |
| ) | |
| # Usage plot | |
| with gr.Row(): | |
| usage_plot_data = create_gradio_usage_plot() | |
| usage_plot = gr.BarPlot( | |
| value=usage_plot_data, | |
| x="usage_type", | |
| y="count", | |
| title="How Participants Use Gradio", | |
| y_title="Number of Participants", | |
| x_title="Usage Type", | |
| height=400, | |
| container=True | |
| ) | |
| # Previous participation | |
| with gr.Row(): | |
| prev_plot_data = create_previous_participation_plot() | |
| prev_plot = gr.BarPlot( | |
| value=prev_plot_data, | |
| x="participation_type", | |
| y="count", | |
| title="Previous Participation", | |
| y_title="Number of Participants", | |
| x_title="Experience Type", | |
| height=350, | |
| container=True | |
| ) | |
| # ============================================ | |
| # TAB 3: ACQUISITION CHANNELS | |
| # ============================================ | |
| with gr.Tab("π’ Channels"): | |
| # Channels plot | |
| with gr.Row(): | |
| channels_plot_data = create_channels_plot() | |
| channels_plot = gr.BarPlot( | |
| value=channels_plot_data, | |
| x="channel", | |
| y="count", | |
| title="Top 10 Acquisition Channels", | |
| y_title="Number of Registrations", | |
| x_title="Channel", | |
| height=400, | |
| container=True | |
| ) | |
| # Channel breakdown | |
| with gr.Row(): | |
| channel_breakdown = gr.Markdown(get_channel_breakdown()) | |
| # ============================================ | |
| # TAB 4: REGISTRATION PATTERNS | |
| # ============================================ | |
| with gr.Tab("β° Patterns"): | |
| with gr.Row(): | |
| hourly_plot_data = create_hourly_plot() | |
| hourly_plot = gr.BarPlot( | |
| value=hourly_plot_data, | |
| x="hour", | |
| y="registrations", | |
| title="Registrations by Hour of Day (UTC)", | |
| y_title="Number of Registrations", | |
| x_title="Hour (24-hour format)", | |
| height=400, | |
| container=True | |
| ) | |
| # Connect refresh button to update all components | |
| refresh_btn.click( | |
| fn=refresh_data, | |
| inputs=[], | |
| outputs=[ | |
| refresh_status, | |
| overview_stats, | |
| experience_breakdown, | |
| reg_plot, | |
| exp_plot, | |
| usage_plot, | |
| prev_plot, | |
| channels_plot, | |
| channel_breakdown, | |
| hourly_plot | |
| ] | |
| ).then( | |
| fn=lambda: gr.Markdown(visible=True), | |
| outputs=refresh_status | |
| ).then( | |
| fn=lambda: gr.Markdown(visible=False), | |
| outputs=refresh_status, | |
| ) | |
| # Footer | |
| gr.Markdown(""" | |
| <div style="text-align: center; padding: 10px; font-size: 14px;"> | |
| <a href="https://huggingface.co/MCP-1st-Birthday" style="margin: 0 10px;">MCP's 1st Birthday Party</a> | | |
| <a href="https://huggingface.co/spaces/MCP-1st-Birthday/gradio-hackathon-registration-winter25" style="margin: 0 10px;">Registration</a> | |
| </div> | |
| """,) | |
| # ============================================ | |
| # LAUNCH | |
| # ============================================ | |
| if __name__ == "__main__": | |
| demo.launch() |