Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import torch | |
| import plotly.express as px | |
| from sklearn.decomposition import PCA | |
| from sklearn.manifold import TSNE | |
| from transformers import AutoConfig, AutoTokenizer | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Transformer Visualizer", | |
| page_icon="🧠", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS styling | |
| st.markdown(""" | |
| <style> | |
| .reportview-container { | |
| background: linear-gradient(45deg, #1a1a1a, #4a4a4a); | |
| } | |
| .sidebar .sidebar-content { | |
| background: #2c2c2c !important; | |
| } | |
| h1, h2, h3, h4, h5, h6 { | |
| color: #00ff00 !important; | |
| } | |
| .stMetric { | |
| background-color: #333333; | |
| border-radius: 10px; | |
| padding: 15px; | |
| } | |
| .architecture { | |
| font-family: monospace; | |
| color: #00ff00; | |
| white-space: pre-wrap; | |
| background-color: #1a1a1a; | |
| padding: 20px; | |
| border-radius: 10px; | |
| border: 1px solid #00ff00; | |
| } | |
| .token-table { | |
| margin-top: 20px; | |
| border: 1px solid #00ff00; | |
| border-radius: 5px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Enhanced Model database | |
| MODELS = { | |
| "BERT": {"model_name": "bert-base-uncased", "type": "Encoder", "layers": 12, "heads": 12, | |
| "params": 109.48, "downloads": "10M+", "release_year": 2018, "gpu_req": "4GB+", | |
| "cpu_req": "4 cores+", "ram_req": "8GB+"}, | |
| "GPT-2": {"model_name": "gpt2", "type": "Decoder", "layers": 12, "heads": 12, | |
| "params": 117, "downloads": "8M+", "release_year": 2019, "gpu_req": "6GB+", | |
| "cpu_req": "4 cores+", "ram_req": "12GB+"}, | |
| "T5-Small": {"model_name": "t5-small", "type": "Seq2Seq", "layers": 6, "heads": 8, | |
| "params": 60, "downloads": "5M+", "release_year": 2019, "gpu_req": "3GB+", | |
| "cpu_req": "2 cores+", "ram_req": "6GB+"}, | |
| "RoBERTa": {"model_name": "roberta-base", "type": "Encoder", "layers": 12, "heads": 12, | |
| "params": 125, "downloads": "7M+", "release_year": 2019, "gpu_req": "5GB+", | |
| "cpu_req": "4 cores+", "ram_req": "10GB+"}, | |
| "DistilBERT": {"model_name": "distilbert-base-uncased", "type": "Encoder", "layers": 6, | |
| "heads": 12, "params": 66, "downloads": "9M+", "release_year": 2019, | |
| "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"}, | |
| "ALBERT": {"model_name": "albert-base-v2", "type": "Encoder", "layers": 12, "heads": 12, | |
| "params": 11.8, "downloads": "3M+", "release_year": 2019, "gpu_req": "1GB+", | |
| "cpu_req": "1 core+", "ram_req": "2GB+"}, | |
| "ELECTRA": {"model_name": "google/electra-small-discriminator", "type": "Encoder", | |
| "layers": 12, "heads": 12, "params": 13.5, "downloads": "2M+", | |
| "release_year": 2020, "gpu_req": "2GB+", "cpu_req": "2 cores+", "ram_req": "4GB+"}, | |
| "XLNet": {"model_name": "xlnet-base-cased", "type": "AutoRegressive", "layers": 12, | |
| "heads": 12, "params": 110, "downloads": "4M+", "release_year": 2019, | |
| "gpu_req": "5GB+", "cpu_req": "4 cores+", "ram_req": "8GB+"}, | |
| "BART": {"model_name": "facebook/bart-base", "type": "Seq2Seq", "layers": 6, "heads": 16, | |
| "params": 139, "downloads": "6M+", "release_year": 2020, "gpu_req": "6GB+", | |
| "cpu_req": "4 cores+", "ram_req": "12GB+"}, | |
| "DeBERTa": {"model_name": "microsoft/deberta-base", "type": "Encoder", "layers": 12, | |
| "heads": 12, "params": 139, "downloads": "3M+", "release_year": 2021, | |
| "gpu_req": "8GB+", "cpu_req": "6 cores+", "ram_req": "16GB+"} | |
| } | |
| def get_model_config(model_name): | |
| config = AutoConfig.from_pretrained(MODELS[model_name]["model_name"]) | |
| return config | |
| def plot_model_comparison(selected_model): | |
| model_names = list(MODELS.keys()) | |
| params = [m["params"] for m in MODELS.values()] | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| bars = ax.bar(model_names, params) | |
| index = list(MODELS.keys()).index(selected_model) | |
| bars[index].set_color('#00ff00') | |
| ax.set_ylabel('Parameters (Millions)', color='white') | |
| ax.set_title('Model Size Comparison', color='white') | |
| ax.tick_params(axis='x', rotation=45, colors='white') | |
| ax.tick_params(axis='y', colors='white') | |
| ax.set_facecolor('#2c2c2c') | |
| fig.patch.set_facecolor('#2c2c2c') | |
| st.pyplot(fig) | |
| def visualize_architecture(model_info): | |
| architecture = [] | |
| model_type = model_info["type"] | |
| layers = model_info["layers"] | |
| heads = model_info["heads"] | |
| architecture.append("Input") | |
| architecture.append("│") | |
| architecture.append("▼") | |
| if model_type == "Encoder": | |
| architecture.append("[Embedding Layer]") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Encoder Layer {i+1}", | |
| "├─ Multi-Head Attention", | |
| f"│ └─ {heads} Heads", | |
| "├─ Layer Normalization", | |
| "└─ Feed Forward Network", | |
| "│", | |
| "▼" | |
| ]) | |
| architecture.append("[Output]") | |
| elif model_type == "Decoder": | |
| architecture.append("[Embedding Layer]") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Decoder Layer {i+1}", | |
| "├─ Masked Multi-Head Attention", | |
| f"│ └─ {heads} Heads", | |
| "├─ Layer Normalization", | |
| "└─ Feed Forward Network", | |
| "│", | |
| "▼" | |
| ]) | |
| architecture.append("[Output]") | |
| elif model_type == "Seq2Seq": | |
| architecture.append("Encoder Stack") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Encoder Layer {i+1}", | |
| "├─ Self-Attention", | |
| "└─ Feed Forward Network", | |
| "│", | |
| "▼" | |
| ]) | |
| architecture.append("→→→ [Context] →→→") | |
| architecture.append("Decoder Stack") | |
| for i in range(layers): | |
| architecture.extend([ | |
| f"Decoder Layer {i+1}", | |
| "├─ Masked Self-Attention", | |
| "├─ Encoder-Decoder Attention", | |
| "└─ Feed Forward Network", | |
| "│", | |
| "▼" | |
| ]) | |
| architecture.append("[Output]") | |
| return "\n".join(architecture) | |
| def visualize_attention_patterns(): | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| data = torch.randn(5, 5) | |
| ax.imshow(data, cmap='viridis') | |
| ax.set_title('Attention Patterns Example', color='white') | |
| ax.set_facecolor('#2c2c2c') | |
| fig.patch.set_facecolor('#2c2c2c') | |
| st.pyplot(fig) | |
| def embedding_projector(): | |
| st.subheader("🔍 Embedding Projector") | |
| # Sample words for visualization | |
| words = ["king", "queen", "man", "woman", "computer", "algorithm", | |
| "neural", "network", "language", "processing"] | |
| # Create dummy embeddings (3D for visualization) | |
| embeddings = torch.randn(len(words), 256) | |
| # Dimensionality reduction | |
| method = st.selectbox("Reduction Method", ["PCA", "t-SNE"]) | |
| if method == "PCA": | |
| reduced = PCA(n_components=3).fit_transform(embeddings) | |
| else: | |
| reduced = TSNE(n_components=3).fit_transform(embeddings.numpy()) | |
| # Create interactive 3D plot | |
| fig = px.scatter_3d( | |
| x=reduced[:,0], y=reduced[:,1], z=reduced[:,2], | |
| text=words, | |
| title=f"Word Embeddings ({method})" | |
| ) | |
| fig.update_traces(marker=dict(size=5), textposition='top center') | |
| st.plotly_chart(fig, use_container_width=True) | |
| def hardware_recommendations(model_info): | |
| st.subheader("💻 Hardware Recommendations") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Minimum GPU", model_info.get("gpu_req", "4GB+")) | |
| with col2: | |
| st.metric("CPU Recommendation", model_info.get("cpu_req", "4 cores+")) | |
| with col3: | |
| st.metric("RAM Requirement", model_info.get("ram_req", "8GB+")) | |
| st.markdown(""" | |
| **Cloud Recommendations:** | |
| - AWS: g4dn.xlarge instance | |
| - GCP: n1-standard-4 with T4 GPU | |
| - Azure: Standard_NC4as_T4_v3 | |
| """) | |
| def model_zoo_statistics(): | |
| st.subheader("📊 Model Zoo Statistics") | |
| df = pd.DataFrame.from_dict(MODELS, orient='index') | |
| st.dataframe( | |
| df[["release_year", "downloads", "params"]], | |
| column_config={ | |
| "release_year": "Release Year", | |
| "downloads": "Downloads", | |
| "params": "Params (M)" | |
| }, | |
| use_container_width=True, | |
| height=400 | |
| ) | |
| fig = px.bar(df, x=df.index, y="params", title="Model Parameters Comparison") | |
| st.plotly_chart(fig, use_container_width=True) | |
| def memory_usage_estimator(model_info): | |
| st.subheader("🧮 Memory Usage Estimator") | |
| precision = st.selectbox("Precision", ["FP32", "FP16", "INT8"]) | |
| batch_size = st.slider("Batch size", 1, 128, 8) | |
| # Memory calculation | |
| bytes_map = {"FP32": 4, "FP16": 2, "INT8": 1} | |
| estimated_memory = (model_info["params"] * 1e6 * bytes_map[precision] * batch_size) / (1024**3) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Estimated VRAM", f"{estimated_memory:.1f} GB") | |
| with col2: | |
| st.metric("Recommended GPU", "RTX 3090" if estimated_memory > 24 else "RTX 3060") | |
| st.progress(min(estimated_memory/40, 1.0), text="GPU Memory Utilization (of 40GB GPU)") | |
| def main(): | |
| st.title("🧠 Transformer Model Visualizer") | |
| selected_model = st.sidebar.selectbox("Select Model", list(MODELS.keys())) | |
| model_info = MODELS[selected_model] | |
| config = get_model_config(selected_model) | |
| tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"]) | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Model Type", model_info["type"]) | |
| with col2: | |
| st.metric("Layers", model_info["layers"]) | |
| with col3: | |
| st.metric("Attention Heads", model_info["heads"]) | |
| with col4: | |
| st.metric("Parameters", f"{model_info['params']}M") | |
| # Updated tabs with all 7 sections | |
| tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs([ | |
| "Model Structure", "Comparison", "Model Attention", | |
| "Tokenization", "Embeddings", "Hardware", "Stats & Memory" | |
| ]) | |
| with tab1: | |
| st.subheader("Architecture Diagram") | |
| architecture = visualize_architecture(model_info) | |
| st.markdown(f"<div class='architecture'>{architecture}</div>", unsafe_allow_html=True) | |
| st.markdown(""" | |
| **Legend:** | |
| - **Multi-Head Attention**: Self-attention mechanism with multiple parallel heads | |
| - **Layer Normalization**: Normalization operation between layers | |
| - **Feed Forward Network**: Position-wise fully connected network | |
| - **Masked Attention**: Attention with future token masking | |
| """) | |
| with tab2: | |
| st.subheader("Model Size Comparison") | |
| plot_model_comparison(selected_model) | |
| with tab3: | |
| st.subheader("Model-specific Visualizations") | |
| visualize_attention_patterns() | |
| if selected_model == "BERT": | |
| st.write("BERT-specific visualization example") | |
| elif selected_model == "GPT-2": | |
| st.write("GPT-2 attention mask visualization") | |
| with tab4: | |
| st.subheader("📝 Tokenization Visualization") | |
| input_text = st.text_input("Enter Text:", "Hello, how are you?") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Tokenized Output**") | |
| tokens = tokenizer.tokenize(input_text) | |
| st.write(tokens) | |
| with col2: | |
| st.markdown("**Token IDs**") | |
| encoded_ids = tokenizer.encode(input_text) | |
| st.write(encoded_ids) | |
| st.markdown("**Token-ID Mapping**") | |
| token_data = pd.DataFrame({ | |
| "Token": tokens, | |
| "ID": encoded_ids[1:-1] if tokenizer.cls_token else encoded_ids | |
| }) | |
| st.dataframe( | |
| token_data, | |
| height=150, | |
| use_container_width=True, | |
| column_config={ | |
| "Token": "Token", | |
| "ID": {"header": "ID", "help": "Numerical representation of the token"} | |
| } | |
| ) | |
| st.markdown(f""" | |
| **Tokenizer Info:** | |
| - Vocabulary size: `{tokenizer.vocab_size}` | |
| - Special tokens: `{tokenizer.all_special_tokens}` | |
| - Padding token: `{tokenizer.pad_token}` | |
| - Max length: `{tokenizer.model_max_length}` | |
| """) | |
| with tab5: | |
| embedding_projector() | |
| with tab6: | |
| hardware_recommendations(model_info) | |
| with tab7: | |
| model_zoo_statistics() | |
| memory_usage_estimator(model_info) | |
| if __name__ == "__main__": | |
| main() |