Upload InternVL2 implementation
Browse files- Dockerfile +8 -4
- Dockerfile.gpu_test +40 -0
- Dockerfile.simple +24 -0
- app_internvl2.py +62 -4
- gpu_test.py +142 -0
- requirements.txt +2 -1
- simple_gpu_app.py +96 -0
- upload_gpu_test.py +118 -0
- upload_gpu_test_direct.py +116 -0
- upload_simple_app.py +113 -0
Dockerfile
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
FROM nvidia/cuda:11.8.0-cudnn8-
|
| 2 |
|
| 3 |
# Set environment variables
|
| 4 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
@@ -25,7 +25,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
| 25 |
python3-pip \
|
| 26 |
python3-dev \
|
| 27 |
python3-setuptools \
|
| 28 |
-
nvidia-cuda-toolkit \
|
| 29 |
&& rm -rf /var/lib/apt/lists/*
|
| 30 |
|
| 31 |
# Create a working directory
|
|
@@ -40,6 +39,9 @@ else \n\
|
|
| 40 |
echo "NVIDIA driver found. Running nvidia-smi:" \n\
|
| 41 |
nvidia-smi \n\
|
| 42 |
fi \n\
|
|
|
|
|
|
|
|
|
|
| 43 |
exec "$@"' > /entrypoint.sh && \
|
| 44 |
chmod +x /entrypoint.sh
|
| 45 |
|
|
@@ -48,14 +50,16 @@ COPY requirements.txt .
|
|
| 48 |
|
| 49 |
# Upgrade pip and install dependencies in specific order to avoid conflicts
|
| 50 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
| 51 |
-
# Install torch and torchvision first
|
| 52 |
-
pip3 install --no-cache-dir torch==2.0.1 torchvision==0.15.2 && \
|
| 53 |
# Install core dependencies
|
| 54 |
pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
|
| 55 |
# Install typing-extensions first to ensure proper version for other packages
|
| 56 |
pip3 install --no-cache-dir typing-extensions==4.10.0 && \
|
| 57 |
# Install huggingface dependencies
|
| 58 |
pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
|
|
|
|
|
|
|
| 59 |
# Install lmdeploy and its dependencies first
|
| 60 |
pip3 install --no-cache-dir "accelerate==0.30.0" && \
|
| 61 |
pip3 install --no-cache-dir "lmdeploy==0.5.3" && \
|
|
|
|
| 1 |
+
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
|
| 2 |
|
| 3 |
# Set environment variables
|
| 4 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
| 25 |
python3-pip \
|
| 26 |
python3-dev \
|
| 27 |
python3-setuptools \
|
|
|
|
| 28 |
&& rm -rf /var/lib/apt/lists/*
|
| 29 |
|
| 30 |
# Create a working directory
|
|
|
|
| 39 |
echo "NVIDIA driver found. Running nvidia-smi:" \n\
|
| 40 |
nvidia-smi \n\
|
| 41 |
fi \n\
|
| 42 |
+
echo "Environment variables for GPU:" \n\
|
| 43 |
+
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}" \n\
|
| 44 |
+
echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \n\
|
| 45 |
exec "$@"' > /entrypoint.sh && \
|
| 46 |
chmod +x /entrypoint.sh
|
| 47 |
|
|
|
|
| 50 |
|
| 51 |
# Upgrade pip and install dependencies in specific order to avoid conflicts
|
| 52 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
| 53 |
+
# Install torch and torchvision first with CUDA support
|
| 54 |
+
pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 && \
|
| 55 |
# Install core dependencies
|
| 56 |
pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
|
| 57 |
# Install typing-extensions first to ensure proper version for other packages
|
| 58 |
pip3 install --no-cache-dir typing-extensions==4.10.0 && \
|
| 59 |
# Install huggingface dependencies
|
| 60 |
pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
|
| 61 |
+
# Install timm for vision models
|
| 62 |
+
pip3 install --no-cache-dir timm==0.9.11 && \
|
| 63 |
# Install lmdeploy and its dependencies first
|
| 64 |
pip3 install --no-cache-dir "accelerate==0.30.0" && \
|
| 65 |
pip3 install --no-cache-dir "lmdeploy==0.5.3" && \
|
Dockerfile.gpu_test
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM huggingface/transformers-pytorch-gpu:latest
|
| 2 |
+
|
| 3 |
+
# Set basic environment variables
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
+
curl \
|
| 9 |
+
git \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Set working directory
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
# Install Python requirements
|
| 16 |
+
RUN pip install --no-cache-dir gradio==3.38.0 pillow numpy
|
| 17 |
+
|
| 18 |
+
# Copy diagnostic script
|
| 19 |
+
COPY gpu_test.py /app/
|
| 20 |
+
|
| 21 |
+
# Add a script to check GPU status at startup
|
| 22 |
+
RUN echo '#!/bin/bash \n\
|
| 23 |
+
echo "==== GPU DIAGNOSTICS STARTUP CHECKS ====" \n\
|
| 24 |
+
echo "Checking NVIDIA driver and CUDA:" \n\
|
| 25 |
+
nvidia-smi || echo "nvidia-smi failed - GPU may not be properly configured" \n\
|
| 26 |
+
echo "Current GPU-related environment variables:" \n\
|
| 27 |
+
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}" \n\
|
| 28 |
+
echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \n\
|
| 29 |
+
echo "==== STARTING APPLICATION ====" \n\
|
| 30 |
+
exec "$@"' > /entrypoint.sh && \
|
| 31 |
+
chmod +x /entrypoint.sh
|
| 32 |
+
|
| 33 |
+
# Make port 7860 available for the app
|
| 34 |
+
EXPOSE 7860
|
| 35 |
+
|
| 36 |
+
# Use our entrypoint script to check GPU status before starting the app
|
| 37 |
+
ENTRYPOINT ["/entrypoint.sh"]
|
| 38 |
+
|
| 39 |
+
# Start the application
|
| 40 |
+
CMD ["python", "gpu_test.py"]
|
Dockerfile.simple
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
|
| 2 |
+
|
| 3 |
+
# Set environment variables
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
+
git \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Set working directory
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
# Install Python requirements
|
| 15 |
+
RUN pip install --no-cache-dir transformers==4.37.2 gradio==3.38.0 pillow
|
| 16 |
+
|
| 17 |
+
# Copy application files
|
| 18 |
+
COPY simple_gpu_app.py /app/app.py
|
| 19 |
+
|
| 20 |
+
# Make port 7860 available for the app
|
| 21 |
+
EXPOSE 7860
|
| 22 |
+
|
| 23 |
+
# Start the application
|
| 24 |
+
CMD ["python", "app.py"]
|
app_internvl2.py
CHANGED
|
@@ -6,10 +6,45 @@ import numpy as np
|
|
| 6 |
import torch
|
| 7 |
import warnings
|
| 8 |
import stat
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Set environment variables
|
| 11 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# Ensure all cache directories exist with proper permissions
|
| 14 |
def setup_cache_directories():
|
| 15 |
# Gradio cache directory
|
|
@@ -44,14 +79,18 @@ warnings.filterwarnings("ignore", category=UserWarning)
|
|
| 44 |
# Check for actual GPU availability
|
| 45 |
def check_gpu_availability():
|
| 46 |
"""Check if GPU is actually available and working"""
|
|
|
|
|
|
|
| 47 |
if not torch.cuda.is_available():
|
| 48 |
print("CUDA is not available in PyTorch")
|
| 49 |
return False
|
| 50 |
|
| 51 |
try:
|
| 52 |
# Try to initialize CUDA and run a simple operation
|
|
|
|
| 53 |
x = torch.rand(10, device="cuda")
|
| 54 |
y = x + x
|
|
|
|
| 55 |
return True
|
| 56 |
except Exception as e:
|
| 57 |
print(f"GPU initialization failed: {str(e)}")
|
|
@@ -72,9 +111,9 @@ try:
|
|
| 72 |
from lmdeploy import pipeline, TurbomindEngineConfig
|
| 73 |
LMDEPLOY_AVAILABLE = True
|
| 74 |
print("Successfully imported lmdeploy")
|
| 75 |
-
except ImportError:
|
| 76 |
LMDEPLOY_AVAILABLE = False
|
| 77 |
-
print("lmdeploy import failed. Will use a placeholder for demos.")
|
| 78 |
|
| 79 |
# Model configuration
|
| 80 |
MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" # 4-bit quantized model
|
|
@@ -137,7 +176,7 @@ def analyze_image(image, prompt):
|
|
| 137 |
if not USE_GPU:
|
| 138 |
return ("ERROR: This application requires a GPU to run InternVL2. "
|
| 139 |
"The NVIDIA driver was not detected on this system. "
|
| 140 |
-
"Please make sure this Space is using a GPU-enabled instance.")
|
| 141 |
|
| 142 |
# Make sure the model is loaded
|
| 143 |
if not load_internvl2_model():
|
|
@@ -197,6 +236,17 @@ def process_image(image, analysis_type="general"):
|
|
| 197 |
def create_interface():
|
| 198 |
with gr.Blocks(title="Image Analysis with InternVL2") as demo:
|
| 199 |
gr.Markdown("# Image Analysis with InternVL2-40B")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
|
| 201 |
|
| 202 |
# Show warnings based on system status
|
|
@@ -223,7 +273,15 @@ def create_interface():
|
|
| 223 |
with gr.Column(scale=2):
|
| 224 |
output_text = gr.Textbox(label="Analysis Result", lines=20)
|
| 225 |
if not USE_GPU:
|
| 226 |
-
output_text.value = "ERROR: NVIDIA GPU driver not detected. This application requires GPU acceleration to run the InternVL2 model.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
submit_btn.click(
|
| 229 |
fn=process_image,
|
|
|
|
| 6 |
import torch
|
| 7 |
import warnings
|
| 8 |
import stat
|
| 9 |
+
import subprocess
|
| 10 |
+
import sys
|
| 11 |
|
| 12 |
# Set environment variables
|
| 13 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
| 14 |
|
| 15 |
+
# Print system information
|
| 16 |
+
print(f"Python version: {sys.version}")
|
| 17 |
+
print(f"PyTorch version: {torch.__version__}")
|
| 18 |
+
print(f"CUDA available via PyTorch: {torch.cuda.is_available()}")
|
| 19 |
+
print(f"CUDA version via PyTorch: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}")
|
| 20 |
+
|
| 21 |
+
# Try to run nvidia-smi
|
| 22 |
+
def run_nvidia_smi():
|
| 23 |
+
try:
|
| 24 |
+
result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 25 |
+
if result.returncode == 0:
|
| 26 |
+
print("nvidia-smi output:")
|
| 27 |
+
print(result.stdout)
|
| 28 |
+
return True
|
| 29 |
+
else:
|
| 30 |
+
print("nvidia-smi error:")
|
| 31 |
+
print(result.stderr)
|
| 32 |
+
return False
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"Error running nvidia-smi: {str(e)}")
|
| 35 |
+
return False
|
| 36 |
+
|
| 37 |
+
# Run nvidia-smi
|
| 38 |
+
nvidia_smi_available = run_nvidia_smi()
|
| 39 |
+
print(f"nvidia-smi available: {nvidia_smi_available}")
|
| 40 |
+
|
| 41 |
+
# Show CUDA devices
|
| 42 |
+
if torch.cuda.is_available():
|
| 43 |
+
print(f"CUDA device count: {torch.cuda.device_count()}")
|
| 44 |
+
for i in range(torch.cuda.device_count()):
|
| 45 |
+
print(f"CUDA Device {i}: {torch.cuda.get_device_name(i)}")
|
| 46 |
+
print(f"Current CUDA device: {torch.cuda.current_device()}")
|
| 47 |
+
|
| 48 |
# Ensure all cache directories exist with proper permissions
|
| 49 |
def setup_cache_directories():
|
| 50 |
# Gradio cache directory
|
|
|
|
| 79 |
# Check for actual GPU availability
|
| 80 |
def check_gpu_availability():
|
| 81 |
"""Check if GPU is actually available and working"""
|
| 82 |
+
print("Checking GPU availability...")
|
| 83 |
+
|
| 84 |
if not torch.cuda.is_available():
|
| 85 |
print("CUDA is not available in PyTorch")
|
| 86 |
return False
|
| 87 |
|
| 88 |
try:
|
| 89 |
# Try to initialize CUDA and run a simple operation
|
| 90 |
+
print("Attempting to create a tensor on CUDA...")
|
| 91 |
x = torch.rand(10, device="cuda")
|
| 92 |
y = x + x
|
| 93 |
+
print("Successfully created and operated on CUDA tensor")
|
| 94 |
return True
|
| 95 |
except Exception as e:
|
| 96 |
print(f"GPU initialization failed: {str(e)}")
|
|
|
|
| 111 |
from lmdeploy import pipeline, TurbomindEngineConfig
|
| 112 |
LMDEPLOY_AVAILABLE = True
|
| 113 |
print("Successfully imported lmdeploy")
|
| 114 |
+
except ImportError as e:
|
| 115 |
LMDEPLOY_AVAILABLE = False
|
| 116 |
+
print(f"lmdeploy import failed: {str(e)}. Will use a placeholder for demos.")
|
| 117 |
|
| 118 |
# Model configuration
|
| 119 |
MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" # 4-bit quantized model
|
|
|
|
| 176 |
if not USE_GPU:
|
| 177 |
return ("ERROR: This application requires a GPU to run InternVL2. "
|
| 178 |
"The NVIDIA driver was not detected on this system. "
|
| 179 |
+
"Please make sure this Space is using a GPU-enabled instance and that the GPU is correctly initialized.")
|
| 180 |
|
| 181 |
# Make sure the model is loaded
|
| 182 |
if not load_internvl2_model():
|
|
|
|
| 236 |
def create_interface():
|
| 237 |
with gr.Blocks(title="Image Analysis with InternVL2") as demo:
|
| 238 |
gr.Markdown("# Image Analysis with InternVL2-40B")
|
| 239 |
+
|
| 240 |
+
# System diagnostics
|
| 241 |
+
system_info = f"""
|
| 242 |
+
## System Diagnostics:
|
| 243 |
+
- PyTorch Version: {torch.__version__}
|
| 244 |
+
- CUDA Available: {torch.cuda.is_available()}
|
| 245 |
+
- GPU Working: {USE_GPU}
|
| 246 |
+
- nvidia-smi Available: {nvidia_smi_available}
|
| 247 |
+
"""
|
| 248 |
+
|
| 249 |
+
gr.Markdown(system_info)
|
| 250 |
gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
|
| 251 |
|
| 252 |
# Show warnings based on system status
|
|
|
|
| 273 |
with gr.Column(scale=2):
|
| 274 |
output_text = gr.Textbox(label="Analysis Result", lines=20)
|
| 275 |
if not USE_GPU:
|
| 276 |
+
output_text.value = f"""ERROR: NVIDIA GPU driver not detected. This application requires GPU acceleration to run the InternVL2 model.
|
| 277 |
+
|
| 278 |
+
Diagnostics:
|
| 279 |
+
- PyTorch Version: {torch.__version__}
|
| 280 |
+
- CUDA Available via PyTorch: {torch.cuda.is_available()}
|
| 281 |
+
- nvidia-smi Available: {nvidia_smi_available}
|
| 282 |
+
- GPU Working: {USE_GPU}
|
| 283 |
+
|
| 284 |
+
Please ensure this Space is using a GPU-enabled instance and that the GPU is correctly initialized."""
|
| 285 |
|
| 286 |
submit_btn.click(
|
| 287 |
fn=process_image,
|
gpu_test.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
GPU Diagnostics Tool for Hugging Face Spaces
|
| 4 |
+
This script performs a comprehensive check of GPU availability and functionality.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import subprocess
|
| 10 |
+
import time
|
| 11 |
+
import json
|
| 12 |
+
|
| 13 |
+
print("=" * 80)
|
| 14 |
+
print("GPU DIAGNOSTICS TOOL")
|
| 15 |
+
print("=" * 80)
|
| 16 |
+
|
| 17 |
+
# Check Python version
|
| 18 |
+
print(f"Python version: {sys.version}")
|
| 19 |
+
print("-" * 80)
|
| 20 |
+
|
| 21 |
+
# Check environment variables
|
| 22 |
+
print("ENVIRONMENT VARIABLES:")
|
| 23 |
+
gpu_related_vars = [
|
| 24 |
+
"CUDA_VISIBLE_DEVICES",
|
| 25 |
+
"NVIDIA_VISIBLE_DEVICES",
|
| 26 |
+
"PYTORCH_CUDA_ALLOC_CONF",
|
| 27 |
+
"HF_HOME"
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
for var in gpu_related_vars:
|
| 31 |
+
print(f"{var}: {os.environ.get(var, 'Not set')}")
|
| 32 |
+
print("-" * 80)
|
| 33 |
+
|
| 34 |
+
# Check for nvidia-smi
|
| 35 |
+
print("CHECKING FOR NVIDIA-SMI:")
|
| 36 |
+
try:
|
| 37 |
+
result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 38 |
+
if result.returncode == 0:
|
| 39 |
+
print("nvidia-smi is available and working!")
|
| 40 |
+
print(result.stdout)
|
| 41 |
+
else:
|
| 42 |
+
print("nvidia-smi error:")
|
| 43 |
+
print(result.stderr)
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"Error running nvidia-smi: {str(e)}")
|
| 46 |
+
print("-" * 80)
|
| 47 |
+
|
| 48 |
+
# Check PyTorch and CUDA
|
| 49 |
+
print("CHECKING PYTORCH AND CUDA:")
|
| 50 |
+
try:
|
| 51 |
+
import torch
|
| 52 |
+
|
| 53 |
+
print(f"PyTorch version: {torch.__version__}")
|
| 54 |
+
print(f"CUDA available: {torch.cuda.is_available()}")
|
| 55 |
+
print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}")
|
| 56 |
+
|
| 57 |
+
if torch.cuda.is_available():
|
| 58 |
+
print(f"CUDA device count: {torch.cuda.device_count()}")
|
| 59 |
+
for i in range(torch.cuda.device_count()):
|
| 60 |
+
print(f"CUDA Device {i}: {torch.cuda.get_device_name(i)}")
|
| 61 |
+
print(f"Current CUDA device: {torch.cuda.current_device()}")
|
| 62 |
+
|
| 63 |
+
# Try to create and operate on a CUDA tensor
|
| 64 |
+
print("\nTesting CUDA tensor creation:")
|
| 65 |
+
try:
|
| 66 |
+
start_time = time.time()
|
| 67 |
+
x = torch.rand(1000, 1000, device="cuda" if torch.cuda.is_available() else "cpu")
|
| 68 |
+
y = x @ x # Matrix multiplication to test computation
|
| 69 |
+
torch.cuda.synchronize() # Wait for the operation to complete
|
| 70 |
+
end_time = time.time()
|
| 71 |
+
|
| 72 |
+
if torch.cuda.is_available():
|
| 73 |
+
print(f"Successfully created and operated on a CUDA tensor in {end_time - start_time:.4f} seconds")
|
| 74 |
+
else:
|
| 75 |
+
print(f"Created and operated on a CPU tensor in {end_time - start_time:.4f} seconds (CUDA not available)")
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"Error in tensor creation/operation: {str(e)}")
|
| 78 |
+
|
| 79 |
+
# Try to get more detailed CUDA info
|
| 80 |
+
if torch.cuda.is_available():
|
| 81 |
+
print("\nDetailed CUDA information:")
|
| 82 |
+
print(f"CUDA capability: {torch.cuda.get_device_capability(0)}")
|
| 83 |
+
print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
|
| 84 |
+
print(f"CUDA arch list: {torch.cuda.get_arch_list() if hasattr(torch.cuda, 'get_arch_list') else 'Not available'}")
|
| 85 |
+
except ImportError:
|
| 86 |
+
print("PyTorch is not installed")
|
| 87 |
+
print("-" * 80)
|
| 88 |
+
|
| 89 |
+
# Create a simple GPU test with a web interface
|
| 90 |
+
print("CREATING SIMPLE GPU TEST WEB INTERFACE...")
|
| 91 |
+
try:
|
| 92 |
+
import gradio as gr
|
| 93 |
+
|
| 94 |
+
def check_gpu():
|
| 95 |
+
results = {
|
| 96 |
+
"python_version": sys.version,
|
| 97 |
+
"environment_vars": {var: os.environ.get(var, "Not set") for var in gpu_related_vars},
|
| 98 |
+
"torch_available": False,
|
| 99 |
+
"cuda_available": False
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
try:
|
| 103 |
+
import torch
|
| 104 |
+
results["torch_available"] = True
|
| 105 |
+
results["torch_version"] = torch.__version__
|
| 106 |
+
results["cuda_available"] = torch.cuda.is_available()
|
| 107 |
+
|
| 108 |
+
if torch.cuda.is_available():
|
| 109 |
+
results["cuda_version"] = torch.version.cuda
|
| 110 |
+
results["cuda_device_count"] = torch.cuda.device_count()
|
| 111 |
+
results["cuda_device_name"] = torch.cuda.get_device_name(0)
|
| 112 |
+
|
| 113 |
+
# Test tensor creation
|
| 114 |
+
start_time = time.time()
|
| 115 |
+
x = torch.rand(1000, 1000, device="cuda")
|
| 116 |
+
y = x @ x
|
| 117 |
+
torch.cuda.synchronize()
|
| 118 |
+
end_time = time.time()
|
| 119 |
+
results["tensor_test_time"] = f"{end_time - start_time:.4f} seconds"
|
| 120 |
+
results["gpu_test_passed"] = True
|
| 121 |
+
else:
|
| 122 |
+
results["gpu_test_passed"] = False
|
| 123 |
+
except Exception as e:
|
| 124 |
+
results["error"] = str(e)
|
| 125 |
+
results["gpu_test_passed"] = False
|
| 126 |
+
|
| 127 |
+
return json.dumps(results, indent=2)
|
| 128 |
+
|
| 129 |
+
demo = gr.Interface(
|
| 130 |
+
fn=check_gpu,
|
| 131 |
+
inputs=[],
|
| 132 |
+
outputs="text",
|
| 133 |
+
title="GPU Diagnostics",
|
| 134 |
+
description="Click the button to run GPU diagnostics"
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
print("Starting Gradio web interface on port 7860...")
|
| 138 |
+
demo.launch(server_name="0.0.0.0")
|
| 139 |
+
except ImportError:
|
| 140 |
+
print("Gradio not installed, skipping web interface")
|
| 141 |
+
print("Raw GPU diagnostics complete.")
|
| 142 |
+
print("-" * 80)
|
requirements.txt
CHANGED
|
@@ -15,4 +15,5 @@ huggingface_hub==0.19.4
|
|
| 15 |
packaging==23.2
|
| 16 |
pyyaml==6.0.1
|
| 17 |
tqdm==4.66.1
|
| 18 |
-
typing-extensions==4.10.0
|
|
|
|
|
|
| 15 |
packaging==23.2
|
| 16 |
pyyaml==6.0.1
|
| 17 |
tqdm==4.66.1
|
| 18 |
+
typing-extensions==4.10.0
|
| 19 |
+
timm==0.9.11
|
simple_gpu_app.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoProcessor, AutoModelForVision2Seq
|
| 4 |
+
from PIL import Image
|
| 5 |
+
|
| 6 |
+
# Print system information
|
| 7 |
+
print(f"PyTorch version: {torch.__version__}")
|
| 8 |
+
print(f"CUDA available: {torch.cuda.is_available()}")
|
| 9 |
+
if torch.cuda.is_available():
|
| 10 |
+
print(f"CUDA device: {torch.cuda.get_device_name(0)}")
|
| 11 |
+
|
| 12 |
+
# Load a smaller model that should work even with limited resources
|
| 13 |
+
model_id = "Salesforce/blip-image-captioning-base" # ~1 GB model, very reliable
|
| 14 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 15 |
+
print(f"Using device: {device}")
|
| 16 |
+
|
| 17 |
+
# Create global variables for model and processor
|
| 18 |
+
processor = None
|
| 19 |
+
model = None
|
| 20 |
+
|
| 21 |
+
def load_model():
|
| 22 |
+
global processor, model
|
| 23 |
+
try:
|
| 24 |
+
print("Loading model and processor...")
|
| 25 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
| 26 |
+
model = AutoModelForVision2Seq.from_pretrained(model_id).to(device)
|
| 27 |
+
print("Model loaded successfully")
|
| 28 |
+
return True
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Error loading model: {e}")
|
| 31 |
+
return False
|
| 32 |
+
|
| 33 |
+
def analyze_image(image):
|
| 34 |
+
# If model not loaded yet, try to load it
|
| 35 |
+
global processor, model
|
| 36 |
+
if model is None:
|
| 37 |
+
success = load_model()
|
| 38 |
+
if not success:
|
| 39 |
+
return "Failed to load model. Check logs for details."
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
if isinstance(image, str):
|
| 43 |
+
# If image is a filepath
|
| 44 |
+
image = Image.open(image).convert('RGB')
|
| 45 |
+
elif not isinstance(image, Image.Image):
|
| 46 |
+
# If image is numpy array (from gradio)
|
| 47 |
+
image = Image.fromarray(image).convert('RGB')
|
| 48 |
+
|
| 49 |
+
# Process image
|
| 50 |
+
inputs = processor(images=image, return_tensors="pt").to(device)
|
| 51 |
+
|
| 52 |
+
# Generate caption
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
output = model.generate(**inputs, max_length=100)
|
| 55 |
+
|
| 56 |
+
# Decode caption
|
| 57 |
+
caption = processor.decode(output[0], skip_special_tokens=True)
|
| 58 |
+
|
| 59 |
+
# Get device information
|
| 60 |
+
if device == "cuda":
|
| 61 |
+
memory_info = torch.cuda.memory_allocated() / 1024**2
|
| 62 |
+
return f"Caption: {caption}\n\nUsing device: {device} ({torch.cuda.get_device_name(0)})\nGPU memory used: {memory_info:.2f} MB"
|
| 63 |
+
else:
|
| 64 |
+
return f"Caption: {caption}\n\nUsing device: {device}"
|
| 65 |
+
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"Error during inference: {e}")
|
| 68 |
+
return f"Error during inference: {str(e)}"
|
| 69 |
+
|
| 70 |
+
# Create Gradio interface
|
| 71 |
+
with gr.Blocks(title="Simple GPU Test") as demo:
|
| 72 |
+
gr.Markdown("# Simple GPU Test with BLIP Image Captioning")
|
| 73 |
+
|
| 74 |
+
with gr.Row():
|
| 75 |
+
with gr.Column():
|
| 76 |
+
image_input = gr.Image(type="pil", label="Upload an image")
|
| 77 |
+
submit_btn = gr.Button("Generate Caption")
|
| 78 |
+
|
| 79 |
+
# Show if GPU is available
|
| 80 |
+
if torch.cuda.is_available():
|
| 81 |
+
gr.Markdown(f"✅ **GPU detected**: {torch.cuda.get_device_name(0)}")
|
| 82 |
+
else:
|
| 83 |
+
gr.Markdown("❌ **No GPU detected**. Running on CPU.")
|
| 84 |
+
|
| 85 |
+
with gr.Column():
|
| 86 |
+
output_text = gr.Textbox(label="Result", lines=5)
|
| 87 |
+
|
| 88 |
+
submit_btn.click(
|
| 89 |
+
fn=analyze_image,
|
| 90 |
+
inputs=[image_input],
|
| 91 |
+
outputs=[output_text]
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Launch the app
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
demo.launch(server_name="0.0.0.0")
|
upload_gpu_test.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Script to upload GPU diagnostics to Hugging Face Spaces
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import getpass
|
| 11 |
+
from huggingface_hub import HfApi, create_repo, upload_file
|
| 12 |
+
|
| 13 |
+
# Default repository name
|
| 14 |
+
DEFAULT_REPO = "mknolan/gpu-diagnostic-test"
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
"""Main function to upload files to Hugging Face Spaces"""
|
| 18 |
+
# Get Hugging Face token with WRITE access
|
| 19 |
+
token = getpass.getpass("Enter your Hugging Face token (with WRITE access): ")
|
| 20 |
+
|
| 21 |
+
# Get repository name
|
| 22 |
+
repo_name = input("Enter repository name (default: {}): ".format(DEFAULT_REPO)) or DEFAULT_REPO
|
| 23 |
+
|
| 24 |
+
print("Uploading to Space: {}".format(repo_name))
|
| 25 |
+
|
| 26 |
+
# Initialize Hugging Face API
|
| 27 |
+
api = HfApi(token=token)
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
# Try to get the repository, create if it doesn't exist
|
| 31 |
+
try:
|
| 32 |
+
repo = api.repo_info(repo_id=repo_name, repo_type="space")
|
| 33 |
+
print("Repo {} ready".format(repo_name))
|
| 34 |
+
except Exception:
|
| 35 |
+
print("Creating new Space: {}".format(repo_name))
|
| 36 |
+
create_repo(
|
| 37 |
+
repo_id=repo_name,
|
| 38 |
+
token=token,
|
| 39 |
+
repo_type="space",
|
| 40 |
+
space_sdk="gradio",
|
| 41 |
+
private=False
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Upload each file separately for more control
|
| 45 |
+
files_to_upload = [
|
| 46 |
+
"gpu_test.py",
|
| 47 |
+
"Dockerfile.gpu_test"
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
print("Uploading diagnostic files to Hugging Face Space...")
|
| 51 |
+
|
| 52 |
+
# Upload Dockerfile as "Dockerfile" (HF requires this name)
|
| 53 |
+
api.upload_file(
|
| 54 |
+
path_or_fileobj="Dockerfile.gpu_test",
|
| 55 |
+
path_in_repo="Dockerfile",
|
| 56 |
+
repo_id=repo_name,
|
| 57 |
+
repo_type="space",
|
| 58 |
+
token=token,
|
| 59 |
+
commit_message="Add Docker configuration for GPU diagnostics"
|
| 60 |
+
)
|
| 61 |
+
print("Uploaded Dockerfile")
|
| 62 |
+
|
| 63 |
+
# Upload the Python script
|
| 64 |
+
api.upload_file(
|
| 65 |
+
path_or_fileobj="gpu_test.py",
|
| 66 |
+
path_in_repo="app.py", # HF Spaces often looks for app.py as the main file
|
| 67 |
+
repo_id=repo_name,
|
| 68 |
+
repo_type="space",
|
| 69 |
+
token=token,
|
| 70 |
+
commit_message="Add GPU diagnostic script"
|
| 71 |
+
)
|
| 72 |
+
print("Uploaded gpu_test.py as app.py")
|
| 73 |
+
|
| 74 |
+
# Create a README.md file
|
| 75 |
+
readme_content = """# GPU Diagnostics Tool
|
| 76 |
+
|
| 77 |
+
This Space provides diagnostics for GPU detection and functionality in Hugging Face Spaces.
|
| 78 |
+
|
| 79 |
+
## Purpose
|
| 80 |
+
- Tests if CUDA is available through PyTorch
|
| 81 |
+
- Tests if nvidia-smi can detect the GPU
|
| 82 |
+
- Tests tensor operations on GPU
|
| 83 |
+
- Provides detailed diagnostic information about the GPU environment
|
| 84 |
+
|
| 85 |
+
## How to Use
|
| 86 |
+
1. Click the "Submit" button
|
| 87 |
+
2. View the JSON results to diagnose GPU issues
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
# Write README to a temporary file
|
| 91 |
+
with open("temp_readme.md", "w") as f:
|
| 92 |
+
f.write(readme_content)
|
| 93 |
+
|
| 94 |
+
# Upload README
|
| 95 |
+
api.upload_file(
|
| 96 |
+
path_or_fileobj="temp_readme.md",
|
| 97 |
+
path_in_repo="README.md",
|
| 98 |
+
repo_id=repo_name,
|
| 99 |
+
repo_type="space",
|
| 100 |
+
token=token,
|
| 101 |
+
commit_message="Add README"
|
| 102 |
+
)
|
| 103 |
+
print("Uploaded README.md")
|
| 104 |
+
|
| 105 |
+
# Clean up temp file
|
| 106 |
+
os.remove("temp_readme.md")
|
| 107 |
+
|
| 108 |
+
print("Upload completed!")
|
| 109 |
+
print("Check your Space at: https://huggingface.co/spaces/{}".format(repo_name))
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print("Error: {}".format(e))
|
| 113 |
+
return 1
|
| 114 |
+
|
| 115 |
+
return 0
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
sys.exit(main())
|
upload_gpu_test_direct.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Script to upload GPU diagnostics to Hugging Face Spaces with direct token input
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
from huggingface_hub import HfApi, create_repo, upload_file
|
| 11 |
+
|
| 12 |
+
# Default repository name
|
| 13 |
+
DEFAULT_REPO = "mknolan/gpu-diagnostic-test"
|
| 14 |
+
# Token should be entered at runtime, not hardcoded
|
| 15 |
+
DEFAULT_TOKEN = ""
|
| 16 |
+
|
| 17 |
+
def main():
|
| 18 |
+
"""Main function to upload files to Hugging Face Spaces"""
|
| 19 |
+
# Get Hugging Face token with WRITE access - direct input instead of getpass
|
| 20 |
+
if DEFAULT_TOKEN:
|
| 21 |
+
token = DEFAULT_TOKEN
|
| 22 |
+
else:
|
| 23 |
+
token = input("Enter your Hugging Face token (with WRITE access): ")
|
| 24 |
+
|
| 25 |
+
# Get repository name
|
| 26 |
+
repo_name = input("Enter repository name (default: {}): ".format(DEFAULT_REPO)) or DEFAULT_REPO
|
| 27 |
+
|
| 28 |
+
print("Uploading to Space: {}".format(repo_name))
|
| 29 |
+
|
| 30 |
+
# Initialize Hugging Face API
|
| 31 |
+
api = HfApi(token=token)
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
# Try to get the repository, create if it doesn't exist
|
| 35 |
+
try:
|
| 36 |
+
repo = api.repo_info(repo_id=repo_name, repo_type="space")
|
| 37 |
+
print("Repo {} ready".format(repo_name))
|
| 38 |
+
except Exception:
|
| 39 |
+
print("Creating new Space: {}".format(repo_name))
|
| 40 |
+
create_repo(
|
| 41 |
+
repo_id=repo_name,
|
| 42 |
+
token=token,
|
| 43 |
+
repo_type="space",
|
| 44 |
+
space_sdk="gradio",
|
| 45 |
+
private=False
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
print("Uploading diagnostic files to Hugging Face Space...")
|
| 49 |
+
|
| 50 |
+
# Upload Dockerfile as "Dockerfile" (HF requires this name)
|
| 51 |
+
api.upload_file(
|
| 52 |
+
path_or_fileobj="Dockerfile.gpu_test",
|
| 53 |
+
path_in_repo="Dockerfile",
|
| 54 |
+
repo_id=repo_name,
|
| 55 |
+
repo_type="space",
|
| 56 |
+
token=token,
|
| 57 |
+
commit_message="Add Docker configuration for GPU diagnostics"
|
| 58 |
+
)
|
| 59 |
+
print("Uploaded Dockerfile")
|
| 60 |
+
|
| 61 |
+
# Upload the Python script
|
| 62 |
+
api.upload_file(
|
| 63 |
+
path_or_fileobj="gpu_test.py",
|
| 64 |
+
path_in_repo="app.py", # HF Spaces often looks for app.py as the main file
|
| 65 |
+
repo_id=repo_name,
|
| 66 |
+
repo_type="space",
|
| 67 |
+
token=token,
|
| 68 |
+
commit_message="Add GPU diagnostic script"
|
| 69 |
+
)
|
| 70 |
+
print("Uploaded gpu_test.py as app.py")
|
| 71 |
+
|
| 72 |
+
# Create a README.md file
|
| 73 |
+
readme_content = """# GPU Diagnostics Tool
|
| 74 |
+
|
| 75 |
+
This Space provides diagnostics for GPU detection and functionality in Hugging Face Spaces.
|
| 76 |
+
|
| 77 |
+
## Purpose
|
| 78 |
+
- Tests if CUDA is available through PyTorch
|
| 79 |
+
- Tests if nvidia-smi can detect the GPU
|
| 80 |
+
- Tests tensor operations on GPU
|
| 81 |
+
- Provides detailed diagnostic information about the GPU environment
|
| 82 |
+
|
| 83 |
+
## How to Use
|
| 84 |
+
1. Click the "Submit" button
|
| 85 |
+
2. View the JSON results to diagnose GPU issues
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
# Write README to a temporary file
|
| 89 |
+
with open("temp_readme.md", "w") as f:
|
| 90 |
+
f.write(readme_content)
|
| 91 |
+
|
| 92 |
+
# Upload README
|
| 93 |
+
api.upload_file(
|
| 94 |
+
path_or_fileobj="temp_readme.md",
|
| 95 |
+
path_in_repo="README.md",
|
| 96 |
+
repo_id=repo_name,
|
| 97 |
+
repo_type="space",
|
| 98 |
+
token=token,
|
| 99 |
+
commit_message="Add README"
|
| 100 |
+
)
|
| 101 |
+
print("Uploaded README.md")
|
| 102 |
+
|
| 103 |
+
# Clean up temp file
|
| 104 |
+
os.remove("temp_readme.md")
|
| 105 |
+
|
| 106 |
+
print("Upload completed!")
|
| 107 |
+
print("Check your Space at: https://huggingface.co/spaces/{}".format(repo_name))
|
| 108 |
+
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print("Error: {}".format(e))
|
| 111 |
+
return 1
|
| 112 |
+
|
| 113 |
+
return 0
|
| 114 |
+
|
| 115 |
+
if __name__ == "__main__":
|
| 116 |
+
sys.exit(main())
|
upload_simple_app.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Script to upload a simple BLIP model to test GPU functionality on Hugging Face Spaces
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
from huggingface_hub import HfApi, create_repo, upload_file
|
| 11 |
+
|
| 12 |
+
# Default repository name
|
| 13 |
+
DEFAULT_REPO = "mknolan/simple-gpu-test"
|
| 14 |
+
# Token should be provided at runtime
|
| 15 |
+
DEFAULT_TOKEN = ""
|
| 16 |
+
|
| 17 |
+
def main():
|
| 18 |
+
"""Main function to upload files to Hugging Face Spaces"""
|
| 19 |
+
# Get Hugging Face token
|
| 20 |
+
token = DEFAULT_TOKEN if DEFAULT_TOKEN else input("Enter your Hugging Face token (with WRITE access): ")
|
| 21 |
+
|
| 22 |
+
# Get repository name
|
| 23 |
+
repo_name = input("Enter repository name (default: {}): ".format(DEFAULT_REPO)) or DEFAULT_REPO
|
| 24 |
+
|
| 25 |
+
print("Uploading to Space: {}".format(repo_name))
|
| 26 |
+
|
| 27 |
+
# Initialize Hugging Face API
|
| 28 |
+
api = HfApi(token=token)
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
# Try to get the repository, create if it doesn't exist
|
| 32 |
+
try:
|
| 33 |
+
repo = api.repo_info(repo_id=repo_name, repo_type="space")
|
| 34 |
+
print("Repo {} ready".format(repo_name))
|
| 35 |
+
except Exception:
|
| 36 |
+
print("Creating new Space: {}".format(repo_name))
|
| 37 |
+
create_repo(
|
| 38 |
+
repo_id=repo_name,
|
| 39 |
+
token=token,
|
| 40 |
+
repo_type="space",
|
| 41 |
+
space_sdk="docker",
|
| 42 |
+
private=False
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
print("Uploading files to Hugging Face Space...")
|
| 46 |
+
|
| 47 |
+
# Upload Dockerfile
|
| 48 |
+
api.upload_file(
|
| 49 |
+
path_or_fileobj="Dockerfile.simple",
|
| 50 |
+
path_in_repo="Dockerfile",
|
| 51 |
+
repo_id=repo_name,
|
| 52 |
+
repo_type="space",
|
| 53 |
+
token=token,
|
| 54 |
+
commit_message="Add Docker configuration for simple GPU test"
|
| 55 |
+
)
|
| 56 |
+
print("Uploaded Dockerfile")
|
| 57 |
+
|
| 58 |
+
# Upload the Python script
|
| 59 |
+
api.upload_file(
|
| 60 |
+
path_or_fileobj="simple_gpu_app.py",
|
| 61 |
+
path_in_repo="app.py",
|
| 62 |
+
repo_id=repo_name,
|
| 63 |
+
repo_type="space",
|
| 64 |
+
token=token,
|
| 65 |
+
commit_message="Add simple GPU test application"
|
| 66 |
+
)
|
| 67 |
+
print("Uploaded simple_gpu_app.py as app.py")
|
| 68 |
+
|
| 69 |
+
# Create a README.md file
|
| 70 |
+
readme_content = """# Simple GPU Test with BLIP
|
| 71 |
+
|
| 72 |
+
This Space provides a simple test of GPU functionality using the BLIP image captioning model.
|
| 73 |
+
|
| 74 |
+
## Features
|
| 75 |
+
- Uses a lightweight model (~1GB) that runs efficiently even on limited GPU resources
|
| 76 |
+
- Provides GPU detection and memory usage information
|
| 77 |
+
- Simple interface for testing if GPU is working
|
| 78 |
+
|
| 79 |
+
## How to Use
|
| 80 |
+
1. Upload an image
|
| 81 |
+
2. Click "Generate Caption"
|
| 82 |
+
3. View the results, including GPU usage information
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
# Write README to a temporary file
|
| 86 |
+
with open("temp_readme.md", "w") as f:
|
| 87 |
+
f.write(readme_content)
|
| 88 |
+
|
| 89 |
+
# Upload README
|
| 90 |
+
api.upload_file(
|
| 91 |
+
path_or_fileobj="temp_readme.md",
|
| 92 |
+
path_in_repo="README.md",
|
| 93 |
+
repo_id=repo_name,
|
| 94 |
+
repo_type="space",
|
| 95 |
+
token=token,
|
| 96 |
+
commit_message="Add README"
|
| 97 |
+
)
|
| 98 |
+
print("Uploaded README.md")
|
| 99 |
+
|
| 100 |
+
# Clean up temp file
|
| 101 |
+
os.remove("temp_readme.md")
|
| 102 |
+
|
| 103 |
+
print("Upload completed!")
|
| 104 |
+
print("Check your Space at: https://huggingface.co/spaces/{}".format(repo_name))
|
| 105 |
+
|
| 106 |
+
except Exception as e:
|
| 107 |
+
print("Error: {}".format(e))
|
| 108 |
+
return 1
|
| 109 |
+
|
| 110 |
+
return 0
|
| 111 |
+
|
| 112 |
+
if __name__ == "__main__":
|
| 113 |
+
sys.exit(main())
|