diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index a6344aac8c09253b3b630fb776ae94478aa0275b..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,35 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index e4d9b211af4b4c08c2dce016474d567ad0dabac2..3538bb8f93e6308eee0977b39a7824a12e941b98 100644 --- a/.gitignore +++ b/.gitignore @@ -1,28 +1,38 @@ -# Dependencies -node_modules/ -frontend/node_modules/ +# Python __pycache__/ *.py[cod] *$py.class +*.so +.Python +env/ +venv/ +.venv/ +ENV/ +env.bak/ +venv.bak/ +.pytest_cache/ +*.egg-info/ +dist/ +build/ + +# Node.js +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.npm +.eslintcache # Build outputs frontend/dist/ frontend/build/ -# Environment -.env -.env.local -.env.development.local -.env.test.local -.env.production.local -.venv/ -venv/ - # IDE .vscode/ .idea/ *.swp *.swo +*~ # OS .DS_Store @@ -32,15 +42,20 @@ Thumbs.db *.log logs/ -# Cache -.cache/ -.pytest_cache/ -.mypy_cache/ - -# Model cache (uncomment to ignore downloaded models) -# models/ -# .cache/huggingface/ +# Environment variables +.env +.env.local +.env.development.local +.env.test.local +.env.production.local # Temporary files *.tmp -*.temp \ No newline at end of file +*.temp +test_*.py +debug_*.py +quick_*.py + +# Model cache (optional - uncomment if you don't want to track downloaded models) +# .cache/ +# models/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 1e129e2a8ff0da825127000133d3add92e0827ee..0000000000000000000000000000000000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,230 +0,0 @@ -# Contributing to Edge LLM ๐Ÿค - -Thank you for your interest in contributing to Edge LLM! This guide will help you get started with development and contributions. - -## ๐Ÿš€ Quick Setup for Contributors - -### 1. Fork and Clone -```bash -# Fork the repository on Hugging Face Spaces -# Then clone your fork -git clone https://huggingface.co/spaces/[your-username]/EdgeLLM -cd EdgeLLM -``` - -### 2. Install Dependencies -```bash -# Install Python dependencies -pip install -r requirements.txt - -# Install Node.js dependencies -cd frontend && npm install && cd .. - -# Optional: Install root package for scripts -npm install -``` - -### 3. Start Development -```bash -# Option 1: Use npm scripts -npm run dev - -# Option 2: Use Python script -python scripts/start_platform.py - -# Option 3: Start manually -npm run backend # Terminal 1 -npm run frontend # Terminal 2 -``` - -## ๐Ÿ“ Project Structure - -``` -EdgeLLM/ # Main project directory -โ”œโ”€โ”€ ๐Ÿ”ง Backend -โ”‚ โ”œโ”€โ”€ backend/ -โ”‚ โ”‚ โ”œโ”€โ”€ api/ # API routes -โ”‚ โ”‚ โ”œโ”€โ”€ services/ # Business logic -โ”‚ โ”‚ โ”œโ”€โ”€ models.py # Data models -โ”‚ โ”‚ โ”œโ”€โ”€ config.py # Configuration -โ”‚ โ”‚ โ””โ”€โ”€ main.py # FastAPI app -โ”‚ โ”œโ”€โ”€ app.py # Entry point -โ”‚ โ””โ”€โ”€ requirements.txt # Python dependencies -โ”œโ”€โ”€ ๐Ÿ’ป Frontend -โ”‚ โ”œโ”€โ”€ frontend/ -โ”‚ โ”‚ โ”œโ”€โ”€ src/ -โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ components/ # React components -โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ pages/ # Page components -โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ hooks/ # Custom hooks -โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ types/ # TypeScript types -โ”‚ โ”‚ โ”œโ”€โ”€ package.json # Frontend dependencies -โ”‚ โ”‚ โ””โ”€โ”€ vite.config.ts # Build configuration -โ”‚ โ””โ”€โ”€ static/ # Built assets (auto-generated) -โ”œโ”€โ”€ ๐Ÿ”จ Development -โ”‚ โ”œโ”€โ”€ scripts/ # Development scripts -โ”‚ โ”œโ”€โ”€ package.json # Root scripts -โ”‚ โ””โ”€โ”€ .gitignore # Git ignore rules -โ””โ”€โ”€ ๐Ÿ“š Documentation - โ”œโ”€โ”€ README.md # Main documentation - โ””โ”€โ”€ CONTRIBUTING.md # This file -``` - -## ๐Ÿ› ๏ธ Development Workflow - -### Frontend Development -```bash -cd frontend -npm run dev # Start dev server (hot reload) -npm run build # Build for production -npm run preview # Preview production build -``` - -### Backend Development -```bash -# Start with auto-reload -uvicorn app:app --host 0.0.0.0 --port 8000 --reload - -# Or use npm script -npm run backend -``` - -### Full Stack Development -```bash -# Start both frontend and backend -npm run dev - -# Build everything -npm run build -``` - -## ๐Ÿงช Testing Your Changes - -### 1. Frontend Testing -```bash -cd frontend -npm run test # Run tests -npm run build # Ensure build works -``` - -### 2. Backend Testing -```bash -# Start backend and test API endpoints -curl http://localhost:8000/health -curl http://localhost:8000/models -``` - -### 3. Integration Testing -```bash -# Build and test full application -npm run build -python app.py # Test production build -``` - -## ๐Ÿ“ Code Style Guidelines - -### Frontend (TypeScript/React) -- Use TypeScript for type safety -- Follow React best practices -- Use ShadCN UI components when possible -- Keep components small and focused -- Use custom hooks for reusable logic - -### Backend (Python/FastAPI) -- Use type hints everywhere -- Follow PEP 8 style guide -- Keep services modular -- Add docstrings to functions -- Use Pydantic models for data validation - -### General -- Write descriptive commit messages -- Keep functions small and focused -- Add comments for complex logic -- Update documentation for new features - -## ๐Ÿ”„ Contribution Process - -### 1. Create a Feature Branch -```bash -git checkout -b feature/your-feature-name -``` - -### 2. Make Your Changes -- Follow the code style guidelines -- Add tests if applicable -- Update documentation - -### 3. Test Your Changes -```bash -npm run build # Ensure everything builds -npm run dev # Test in development -``` - -### 4. Commit and Push -```bash -git add . -git commit -m "feat: add your feature description" -git push origin feature/your-feature-name -``` - -### 5. Create a Pull Request -- Describe your changes clearly -- Include screenshots if UI changes -- Reference any related issues - -## ๐ŸŽฏ Areas for Contribution - -### ๐Ÿ”ง Backend Improvements -- Add new model support -- Improve error handling -- Add model caching optimizations -- Create API tests - -### ๐Ÿ’ป Frontend Enhancements -- Add new UI components -- Improve chat interface -- Add dark mode support -- Enhance accessibility - -### ๐Ÿ“š Documentation -- Improve README -- Add code comments -- Create tutorials -- Update API documentation - -### ๐Ÿš€ DevOps & Deployment -- Improve Docker configuration -- Add CI/CD workflows -- Optimize build process -- Add monitoring - -## ๐Ÿ› Bug Reports - -When reporting bugs, please include: -- Steps to reproduce -- Expected behavior -- Actual behavior -- Browser/OS information -- Console error messages - -## ๐Ÿ’ก Feature Requests - -When requesting features, please include: -- Clear description of the feature -- Use case and motivation -- Proposed implementation approach -- Any relevant examples - -## ๐Ÿ“ž Getting Help - -- **Issues**: Create a GitHub issue for bugs or questions -- **Discussions**: Use GitHub discussions for general questions -- **Documentation**: Check the README and API docs first - -## ๐Ÿ™ Thank You! - -Every contribution, no matter how small, helps make Edge LLM better for everyone. We appreciate your time and effort! - ---- - -**Happy coding!** ๐Ÿš€ diff --git a/Dockerfile b/Dockerfile index 3919f1af443ecb3898c7e65e5ee12670d1820fb8..321e4d35a71e19bca4e764ab1a4b74edfa8b76a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,4 +13,4 @@ COPY --chown=user ./requirements.txt requirements.txt RUN pip install --no-cache-dir --upgrade -r requirements.txt COPY --chown=user . /app -CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] +CMD ["python", "app.py"] diff --git a/LICENSE b/LICENSE deleted file mode 100644 index b84d80f33e718b7e1b235b8025c0a7ba2cb041c4..0000000000000000000000000000000000000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2025 ZEKUN WU - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md index c674c7aac17f7ec177539fec8f328be60931a24e..d4b7a724a4003f831a08a12e115249286a764e9f 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,137 @@ ---- -title: EdgeLLM -emoji: ๐Ÿ† -colorFrom: blue -colorTo: yellow -sdk: docker -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# ๐Ÿš€ Edge LLM Platform + +A lightweight, local LLM inference platform with a modern web interface. + +> **Note**: All development now happens directly in this repository (EdgeLLM_HF). This is both the development environment and the production Hugging Face Space. + +## โœจ Features + +### ๐Ÿค– **Hybrid Model Support** +- **Local Models**: Run Qwen models locally for privacy +- **API Models**: Access powerful cloud models via [AiHubMix API](https://docs.aihubmix.com/en/api/Qwen) +- **Seamless Switching**: Switch between local and API models effortlessly +- **Thinking Models**: Support for models with visible reasoning process + +### ๐ŸŒ **Available Models** + +#### Local Models (Privacy-First) +- `Qwen/Qwen3-4B-Thinking-2507` - Local model with thinking process (~8GB) +- `Qwen/Qwen3-4B-Instruct-2507` - Local direct instruction model (~8GB) + +#### API Models (Cloud-Powered) +- `Qwen/Qwen3-30B-A3B` - Advanced Qwen3 with dynamic thinking modes +- `qwen2.5-vl-72b-instruct` - Multimodal model with vision capabilities +- `Qwen/QVQ-72B-Preview` - Visual reasoning with thinking process + +### ๐ŸŽจ **Modern UI/UX** +- **Responsive Design**: Works on desktop and mobile +- **Chat Interface**: Beautiful conversation bubbles with session management +- **Model Management**: Easy switching between local and API models +- **Parameter Controls**: Temperature, max tokens, and system prompts +- **Session History**: Persistent conversations with localStorage + +## ๐Ÿ“ Project Structure + +``` +EdgeLLM/ +โ”œโ”€โ”€ frontend/ # ๐ŸŽจ React frontend with ShadCN UI +โ”œโ”€โ”€ backend/ # ๐Ÿ”ง FastAPI backend +โ”œโ”€โ”€ static/ # ๐Ÿ“ฑ Built frontend assets +โ”œโ”€โ”€ app.py # ๐ŸŒ Production entry point +โ”œโ”€โ”€ requirements.txt # ๐Ÿ Python dependencies +โ””โ”€โ”€ README.md # ๐Ÿ“– Documentation +``` + +## ๐ŸŽฏ Quick Start + +1. **Clone the repository** + ```bash + git clone https://huggingface.co/spaces/wu981526092/EdgeLLM + cd EdgeLLM + ``` + +2. **Set up environment variables** + ```bash + # Create .env file with your API credentials + echo 'api_key="your-aihubmix-api-key"' > .env + echo 'base_url="https://aihubmix.com/v1"' >> .env + ``` + +3. **Install dependencies** + ```bash + pip install -r requirements.txt + cd frontend && npm install && cd .. + ``` + +4. **Run locally** + ```bash + python app.py + ``` + +5. **Deploy changes** + ```bash + # Build frontend if needed + cd frontend && npm run build && cd .. + + # Push to Hugging Face + git add . + git commit -m "Update: your changes" + git push + ``` + +## ๐ŸŒ Live Demo + +Visit the live demo at: [https://huggingface.co/spaces/wu981526092/EdgeLLM](https://huggingface.co/spaces/wu981526092/EdgeLLM) + +## ๐Ÿ”ง Configuration + +### Environment Variables + +For local development, create a `.env` file: +```bash +api_key="your-aihubmix-api-key" +base_url="https://aihubmix.com/v1" +``` + +For production (Hugging Face Spaces), set these as secrets: +- `api_key`: Your AiHubMix API key +- `base_url`: API endpoint (https://aihubmix.com/v1) + +### API Integration + +This platform integrates with [AiHubMix API](https://docs.aihubmix.com/en/api/Qwen) for cloud-based model access. Features include: + +- OpenAI-compatible API interface +- Support for Qwen 3 series models +- Multimodal capabilities (text + vision) +- Streaming and non-streaming responses + +## ๐Ÿ› ๏ธ Development Workflow + +1. **Frontend development**: Work in `frontend/` +2. **Backend development**: Work in `backend/` +3. **Build frontend**: `cd frontend && npm run build` +4. **Deploy**: Standard git workflow + ```bash + git add . + git commit -m "Your changes" + git push + ``` + +## ๐Ÿ—๏ธ Architecture + +### Backend (FastAPI) +- **Models Service**: Handles both local model loading and API client management +- **Chat Service**: Routes requests to appropriate generation method (local/API) +- **API Routes**: RESTful endpoints for model management and text generation +- **Configuration**: Environment-based settings for API credentials + +### Frontend (React + TypeScript) +- **Modern UI**: Built with ShadCN components and Tailwind CSS +- **Chat Interface**: Real-time conversation with message bubbles +- **Model Management**: Easy switching between available models +- **Session Management**: Persistent chat history and settings + +## ๐Ÿ“„ License + +MIT License - see `LICENSE` for details. diff --git a/app.py b/app.py index e5a7cdee7b2a6eee7769a1213eb9aaa1b1e95ac2..f6f0188ac5b6c9c68569eb43dad0dab132d9c4ea 100644 --- a/app.py +++ b/app.py @@ -1,292 +1,195 @@ -from fastapi import FastAPI, HTTPException -from fastapi.middleware.cors import CORSMiddleware -from fastapi.staticfiles import StaticFiles -from fastapi.responses import FileResponse -from pydantic import BaseModel -from transformers import AutoModelForCausalLM, AutoTokenizer -import torch -from typing import Optional, Dict, Any +""" +Edge LLM API - Main application entry point with integrated frontend + +This entry point handles both backend API and frontend serving, +with automatic port detection and process management. +""" +import uvicorn +import socket +import subprocess +import sys import os - -app = FastAPI(title="Edge LLM API") - -# Enable CORS for Hugging Face Space -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Allow all origins for HF Space - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Mount static files -app.mount("/assets", StaticFiles(directory="static/assets"), name="assets") - -# Available models -AVAILABLE_MODELS = { - "Qwen/Qwen3-4B-Thinking-2507": { - "name": "Qwen3-4B-Thinking-2507", - "supports_thinking": True, - "description": "Shows thinking process", - "size_gb": "~8GB" - }, - "Qwen/Qwen3-4B-Instruct-2507": { - "name": "Qwen3-4B-Instruct-2507", - "supports_thinking": False, - "description": "Direct instruction following", - "size_gb": "~8GB" - } -} - -# Global model cache -models_cache: Dict[str, Dict[str, Any]] = {} -current_model_name = None # No model loaded by default - -class PromptRequest(BaseModel): - prompt: str - system_prompt: Optional[str] = None - model_name: Optional[str] = None - temperature: Optional[float] = 0.7 - max_new_tokens: Optional[int] = 1024 - -class PromptResponse(BaseModel): - thinking_content: str - content: str - model_used: str - supports_thinking: bool - -class ModelInfo(BaseModel): - model_name: str - name: str - supports_thinking: bool - description: str - size_gb: str - is_loaded: bool - -class ModelsResponse(BaseModel): - models: list[ModelInfo] - current_model: str - -class ModelLoadRequest(BaseModel): - model_name: str - -class ModelUnloadRequest(BaseModel): - model_name: str - -def load_model_by_name(model_name: str): - """Load a model into the cache""" - global models_cache +import time +import signal +import webbrowser +from backend.main import app + +def find_free_port(start_port=8000, max_attempts=50): + """Find a free port starting from start_port""" + for port in range(start_port, start_port + max_attempts): + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('localhost', port)) + return port + except OSError: + continue + raise RuntimeError(f"Could not find a free port in range {start_port}-{start_port + max_attempts}") + +def kill_processes_on_port(port): + """Kill processes using the specified port""" + try: + if os.name == 'nt': # Windows + result = subprocess.run(['netstat', '-ano'], capture_output=True, text=True) + lines = result.stdout.split('\n') + for line in lines: + if f':{port}' in line and 'LISTENING' in line: + parts = line.split() + if len(parts) >= 5: + pid = parts[-1] + try: + subprocess.run(['taskkill', '/pid', pid, '/f'], + capture_output=True, check=True) + print(f"โœ… Killed process {pid} on port {port}") + except subprocess.CalledProcessError: + pass + else: # Unix/Linux/macOS + try: + result = subprocess.run(['lsof', '-ti', f':{port}'], + capture_output=True, text=True) + pids = result.stdout.strip().split('\n') + for pid in pids: + if pid: + subprocess.run(['kill', '-9', pid], capture_output=True) + print(f"โœ… Killed process {pid} on port {port}") + except subprocess.CalledProcessError: + pass + except Exception as e: + print(f"โš ๏ธ Warning: Could not kill processes on port {port}: {e}") + +def update_frontend_config(port): + """Update frontend configuration to use the correct backend port""" + frontend_files = [ + 'frontend/src/pages/Models.tsx', + 'frontend/src/pages/Playground.tsx' + ] - if model_name in models_cache: + for file_path in frontend_files: + if os.path.exists(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Update the baseUrl to use the current port (no longer needed with dynamic ports) + old_pattern = "window.location.hostname === 'localhost' ? `${window.location.protocol}//${window.location.host}` : ''" + new_pattern = old_pattern # No change needed since it's already dynamic + + # No need to update frontend files since they use dynamic origins now + print(f"โœ… Frontend uses dynamic origins - no port updates needed") + except Exception as e: + print(f"โš ๏ธ Warning: Could not update {file_path}: {e}") + +def build_frontend(): + """Build the frontend if needed""" + if not os.path.exists('frontend/dist') or not os.listdir('frontend/dist'): + print("๐Ÿ”จ Building frontend...") + try: + os.chdir('frontend') + subprocess.run(['npm', 'install'], check=True, capture_output=True) + subprocess.run(['npm', 'run', 'build'], check=True, capture_output=True) + os.chdir('..') + print("โœ… Frontend built successfully") + except subprocess.CalledProcessError as e: + print(f"โŒ Frontend build failed: {e}") + os.chdir('..') + return False + except FileNotFoundError: + print("โŒ npm not found. Please install Node.js") + return False + return True + +def should_rebuild_frontend(): + """Check if frontend needs to be rebuilt""" + # Check if build exists + if not (os.path.exists('frontend/dist/index.html') and os.path.exists('frontend/dist/assets')): + print("โš ๏ธ Frontend build not found - will build it") return True - if model_name not in AVAILABLE_MODELS: - return False - + # Check if source is newer than build try: - print(f"Loading model: {model_name}") - tokenizer = AutoTokenizer.from_pretrained(model_name) - model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16, - device_map="auto" - ) + dist_time = os.path.getmtime('frontend/dist/index.html') - models_cache[model_name] = { - "model": model, - "tokenizer": tokenizer - } - print(f"Model {model_name} loaded successfully") - return True - except Exception as e: - print(f"Error loading model {model_name}: {e}") + # Check key source files + source_files = [ + 'frontend/src', + 'frontend/package.json', + 'frontend/vite.config.ts', + 'frontend/tsconfig.json' + ] + + for src_path in source_files: + if os.path.exists(src_path): + if os.path.isdir(src_path): + # Check all files in directory + for root, dirs, files in os.walk(src_path): + for file in files: + file_path = os.path.join(root, file) + if os.path.getmtime(file_path) > dist_time: + print(f"๐Ÿ”„ Source files changed - will rebuild frontend") + return True + else: + if os.path.getmtime(src_path) > dist_time: + print(f"๐Ÿ”„ {src_path} changed - will rebuild frontend") + return True + + print("โœ… Frontend build is up to date") return False - -def unload_model_by_name(model_name: str): - """Unload a model from the cache""" - global models_cache, current_model_name - - if model_name in models_cache: - del models_cache[model_name] - if current_model_name == model_name: - current_model_name = None - print(f"Model {model_name} unloaded") + + except Exception as e: + print(f"โš ๏ธ Error checking build status: {e} - will rebuild") return True - return False - -@app.on_event("startup") -async def startup_event(): - """Startup event - don't load models by default""" - print("๐Ÿš€ Edge LLM API is starting up...") - print("๐Ÿ’ก Models will be loaded on demand") -@app.get("/") -async def read_index(): - """Serve the React app""" - return FileResponse('static/index.html') +def cleanup_handler(signum, frame): + """Handle cleanup on exit""" + print("\n๐Ÿ›‘ Shutting down Edge LLM...") + sys.exit(0) -@app.get("/health") -async def health_check(): - return {"status": "healthy", "message": "Edge LLM API is running"} - -@app.get("/models", response_model=ModelsResponse) -async def get_models(): - """Get available models and their status""" - global current_model_name - - models = [] - for model_name, info in AVAILABLE_MODELS.items(): - models.append(ModelInfo( - model_name=model_name, - name=info["name"], - supports_thinking=info["supports_thinking"], - description=info["description"], - size_gb=info["size_gb"], - is_loaded=model_name in models_cache - )) - - return ModelsResponse( - models=models, - current_model=current_model_name or "" - ) - -@app.post("/load-model") -async def load_model(request: ModelLoadRequest): - """Load a specific model""" - global current_model_name - - if request.model_name not in AVAILABLE_MODELS: - raise HTTPException( - status_code=400, - detail=f"Model {request.model_name} not available" - ) +if __name__ == "__main__": + # Set up signal handlers + signal.signal(signal.SIGINT, cleanup_handler) + signal.signal(signal.SIGTERM, cleanup_handler) - success = load_model_by_name(request.model_name) - if success: - current_model_name = request.model_name - return { - "message": f"Model {request.model_name} loaded successfully", - "current_model": current_model_name - } - else: - raise HTTPException( - status_code=500, - detail=f"Failed to load model {request.model_name}" - ) - -@app.post("/unload-model") -async def unload_model(request: ModelUnloadRequest): - """Unload a specific model""" - global current_model_name + print("๐Ÿš€ Starting Edge LLM with auto-build frontend...") - success = unload_model_by_name(request.model_name) - if success: - return { - "message": f"Model {request.model_name} unloaded successfully", - "current_model": current_model_name or "" - } + # Find available port + import os + original_port = int(os.getenv("PORT", "0")) # Use env var or auto-assign + if original_port == 0: + # Auto-assign a free port starting from 8000 + original_port = find_free_port(8000) + print(f"๐Ÿ” Auto-assigned port: {original_port}") else: - raise HTTPException( - status_code=404, - detail=f"Model {request.model_name} not found in cache" - ) - -@app.post("/set-current-model") -async def set_current_model(request: ModelLoadRequest): - """Set the current active model""" - global current_model_name - - if request.model_name not in models_cache: - raise HTTPException( - status_code=400, - detail=f"Model {request.model_name} is not loaded. Please load it first." - ) - - current_model_name = request.model_name - return { - "message": f"Current model set to {current_model_name}", - "current_model": current_model_name - } - -@app.post("/generate", response_model=PromptResponse) -async def generate_text(request: PromptRequest): - """Generate text using the loaded model""" - global current_model_name - - # Use the model specified in request, or fall back to current model - model_to_use = request.model_name if request.model_name else current_model_name - - if not model_to_use: - raise HTTPException( - status_code=400, - detail="No model specified. Please load a model first." - ) - - if model_to_use not in models_cache: - raise HTTPException( - status_code=400, - detail=f"Model {model_to_use} is not loaded. Please load it first." - ) + kill_processes_on_port(original_port) try: - model = models_cache[model_to_use]["model"] - tokenizer = models_cache[model_to_use]["tokenizer"] - model_info = AVAILABLE_MODELS[model_to_use] - - # Build the prompt - messages = [] - if request.system_prompt: - messages.append({"role": "system", "content": request.system_prompt}) - messages.append({"role": "user", "content": request.prompt}) - - # Apply chat template - formatted_prompt = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) + port = find_free_port(original_port) + print(f"๐Ÿ“ก Using port: {port}") - # Tokenize - inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) + if port != original_port: + print(f"โš ๏ธ Port {original_port} was busy, switched to {port}") + update_frontend_config(port) - # Generate - with torch.no_grad(): - outputs = model.generate( - **inputs, - max_new_tokens=request.max_new_tokens, - temperature=request.temperature, - do_sample=True, - pad_token_id=tokenizer.eos_token_id - ) + # Auto-build frontend if needed + if should_rebuild_frontend(): + print("๐Ÿ”จ Building frontend...") + build_frontend() - # Decode - generated_tokens = outputs[0][inputs['input_ids'].shape[1]:] - generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) + # Start the backend server + print(f"๐ŸŒ Starting server on http://localhost:{port}") + print("๐ŸŽฏ Frontend and Backend integrated - ready to use!") - # Parse thinking vs final content for thinking models - thinking_content = "" - final_content = generated_text + # Auto-open browser after a short delay + def open_browser(): + time.sleep(2) + webbrowser.open(f'http://localhost:{port}') - if model_info["supports_thinking"] and "" in generated_text: - parts = generated_text.split("") - if len(parts) > 1: - thinking_part = parts[1] - if "" in thinking_part: - thinking_content = thinking_part.split("")[0].strip() - remaining = thinking_part.split("", 1)[1] if "" in thinking_part else "" - final_content = remaining.strip() + import threading + browser_thread = threading.Thread(target=open_browser) + browser_thread.daemon = True + browser_thread.start() - return PromptResponse( - thinking_content=thinking_content, - content=final_content, - model_used=model_to_use, - supports_thinking=model_info["supports_thinking"] - ) + # Start the server + uvicorn.run(app, host="0.0.0.0", port=port) except Exception as e: - print(f"Generation error: {e}") - raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}") - -if __name__ == "__main__": - import uvicorn - uvicorn.run(app, host="0.0.0.0", port=7860) + print(f"โŒ Error starting server: {e}") + sys.exit(1) diff --git a/backend/__init__.py b/backend/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/backend/api/__init__.py b/backend/api/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/backend/api/endpoints/__init__.py b/backend/api/endpoints/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/backend/api/routes.py b/backend/api/routes.py index bdbf8c58f281ec56f0960472c8de1ceb7e9b7ee9..ca7bdf8a5aae46acece21f5246a58e378c549beb 100644 --- a/backend/api/routes.py +++ b/backend/api/routes.py @@ -1,7 +1,7 @@ """ API routes for Edge LLM """ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, Request from fastapi.responses import FileResponse from ..models import ( PromptRequest, PromptResponse, ModelInfo, ModelsResponse, @@ -18,7 +18,8 @@ router = APIRouter() @router.get("/") async def read_index(): """Serve the React app""" - return FileResponse('static/index.html') + from ..config import FRONTEND_DIST_DIR + return FileResponse(f'{FRONTEND_DIST_DIR}/index.html') @router.get("/health") @@ -38,7 +39,8 @@ async def get_models(): supports_thinking=info["supports_thinking"], description=info["description"], size_gb=info["size_gb"], - is_loaded=model_service.is_model_loaded(model_name) + is_loaded=model_service.is_model_loaded(model_name), + type=info["type"] )) return ModelsResponse( @@ -124,6 +126,7 @@ async def generate_text(request: PromptRequest): thinking_content, final_content, model_used, supports_thinking = chat_service.generate_response( prompt=request.prompt, model_name=model_to_use, + messages=[msg.dict() for msg in request.messages] if request.messages else [], system_prompt=request.system_prompt, temperature=request.temperature, max_new_tokens=request.max_new_tokens @@ -139,3 +142,14 @@ async def generate_text(request: PromptRequest): except Exception as e: print(f"Generation error: {e}") raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}") + + +# Catch-all route for SPA - must be last +@router.get("/{full_path:path}") +async def catch_all(request: Request, full_path: str): + """ + Catch-all route to serve index.html for any unmatched paths. + This enables client-side routing for the React SPA. + """ + from ..config import FRONTEND_DIST_DIR + return FileResponse(f'{FRONTEND_DIST_DIR}/index.html') diff --git a/backend/app.py b/backend/app.py deleted file mode 100644 index 2dda420e9d79d8f7c6642e01bd6467ed71fc33c9..0000000000000000000000000000000000000000 --- a/backend/app.py +++ /dev/null @@ -1,243 +0,0 @@ -from fastapi import FastAPI, HTTPException -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel -from transformers import AutoModelForCausalLM, AutoTokenizer -import torch -from typing import Optional, Dict, Any - -app = FastAPI(title="Edge LLM API") - -# Enable CORS for frontend -app.add_middleware( - CORSMiddleware, - allow_origins=["http://localhost:5173", "http://localhost:5174"], # Vite ports - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Available models -AVAILABLE_MODELS = { - "Qwen/Qwen3-4B-Thinking-2507": { - "name": "Qwen3-4B-Thinking-2507", - "supports_thinking": True, - "description": "Shows thinking process", - "size_gb": "~8GB" - }, - "Qwen/Qwen3-4B-Instruct-2507": { - "name": "Qwen3-4B-Instruct-2507", - "supports_thinking": False, - "description": "Direct instruction following", - "size_gb": "~8GB" - } -} - -# Global model cache -models_cache: Dict[str, Dict[str, Any]] = {} -current_model_name = None # No model loaded by default - -class PromptRequest(BaseModel): - prompt: str - system_prompt: Optional[str] = None - model_name: Optional[str] = None - temperature: Optional[float] = 0.7 - max_new_tokens: Optional[int] = 1024 - -class PromptResponse(BaseModel): - thinking_content: str - content: str - model_used: str - supports_thinking: bool - -class ModelInfo(BaseModel): - model_name: str - name: str - supports_thinking: bool - description: str - size_gb: str - is_loaded: bool - -class ModelLoadRequest(BaseModel): - model_name: str - -class ModelUnloadRequest(BaseModel): - model_name: str - -async def load_model_by_name(model_name: str): - """Load a specific model and cache it (without setting as current)""" - global models_cache - - if model_name not in AVAILABLE_MODELS: - raise HTTPException(status_code=400, detail=f"Model {model_name} not available") - - if model_name not in models_cache: - print(f"Loading model: {model_name}...") - tokenizer = AutoTokenizer.from_pretrained(model_name) - model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype="auto", - device_map="auto" - ) - models_cache[model_name] = { - "model": model, - "tokenizer": tokenizer - } - print(f"Model {model_name} loaded successfully!") - - return models_cache[model_name] - -def unload_model_by_name(model_name: str): - """Unload a specific model from cache""" - global models_cache, current_model_name - - if model_name in models_cache: - del models_cache[model_name] - print(f"Model {model_name} unloaded from cache") - - # If current model was unloaded, reset current model - if current_model_name == model_name: - current_model_name = None - -@app.on_event("startup") -async def startup_event(): - """Startup without loading any models""" - print("Backend started. Models will be loaded on demand.") - -@app.get("/") -async def root(): - return {"message": "Edge LLM API is running"} - -@app.get("/models") -async def get_available_models(): - """Get list of available models with their status""" - models_info = [] - for model_name, info in AVAILABLE_MODELS.items(): - models_info.append(ModelInfo( - model_name=model_name, - name=info["name"], - supports_thinking=info["supports_thinking"], - description=info["description"], - size_gb=info["size_gb"], - is_loaded=model_name in models_cache - )) - return { - "models": models_info, - "current_model": current_model_name - } - -@app.post("/load-model") -async def load_model(request: ModelLoadRequest): - """Load a model into memory""" - try: - model_data = await load_model_by_name(request.model_name) - return { - "message": f"Model loaded: {request.model_name}", - "model_name": request.model_name, - "supports_thinking": AVAILABLE_MODELS[request.model_name]["supports_thinking"] - } - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/unload-model") -async def unload_model(request: ModelUnloadRequest): - """Unload a model from memory""" - try: - unload_model_by_name(request.model_name) - return { - "message": f"Model unloaded: {request.model_name}", - "model_name": request.model_name - } - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/set-current-model") -async def set_current_model(request: ModelLoadRequest): - """Set the current active model (must be loaded first)""" - global current_model_name - - if request.model_name not in models_cache: - raise HTTPException(status_code=400, detail=f"Model {request.model_name} is not loaded. Please load it first.") - - current_model_name = request.model_name - return { - "message": f"Current model set to: {request.model_name}", - "model_name": request.model_name, - "supports_thinking": AVAILABLE_MODELS[request.model_name]["supports_thinking"] - } - -@app.post("/generate", response_model=PromptResponse) -async def generate_response(request: PromptRequest): - global current_model_name - - # Determine which model to use - target_model = request.model_name if request.model_name else current_model_name - - if not target_model: - raise HTTPException(status_code=400, detail="No model specified and no current model set") - - # Check if the target model is loaded - if target_model not in models_cache: - raise HTTPException( - status_code=400, - detail=f"Model {target_model} is not loaded. Please load the model first using the load button." - ) - - # Set as current model if it's different - if target_model != current_model_name: - current_model_name = target_model - - # Get model and tokenizer - model_data = models_cache[current_model_name] - model = model_data["model"] - tokenizer = model_data["tokenizer"] - supports_thinking = AVAILABLE_MODELS[current_model_name]["supports_thinking"] - - # Prepare the model input with optional system prompt - messages = [] - if request.system_prompt: - messages.append({"role": "system", "content": request.system_prompt}) - messages.append({"role": "user", "content": request.prompt}) - - text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True, - ) - model_inputs = tokenizer([text], return_tensors="pt").to(model.device) - - # Generate response with parameters - generated_ids = model.generate( - **model_inputs, - max_new_tokens=request.max_new_tokens, - temperature=request.temperature, - do_sample=True if request.temperature > 0 else False, - pad_token_id=tokenizer.eos_token_id - ) - output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() - - thinking_content = "" - content = "" - - if supports_thinking: - # Parse thinking content for thinking models - try: - index = len(output_ids) - output_ids[::-1].index(151668) - except ValueError: - index = 0 - - thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n") - content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") - else: - # For non-thinking models, everything is content - content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n") - - return PromptResponse( - thinking_content=thinking_content, - content=content, - model_used=current_model_name, - supports_thinking=supports_thinking - ) - -if __name__ == "__main__": - import uvicorn - uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False) diff --git a/backend/config.py b/backend/config.py index 96ca313095497d82d1692abd3ef61ae078a482d9..3d139f9164a46278d29c76fae7debb5164240ac4 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,30 +1,64 @@ """ Configuration settings for the Edge LLM API """ +import os +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# API Configuration +API_KEY = os.getenv("api_key", "") +BASE_URL = os.getenv("base_url", "https://aihubmix.com/v1") # Available models configuration AVAILABLE_MODELS = { + # API models (AiHubMix) - Prioritized first + "Qwen/Qwen3-30B-A3B": { + "name": "Qwen3-30B-A3B", + "supports_thinking": True, + "description": "API: Qwen3 with dynamic thinking modes", + "size_gb": "API", + "type": "api" + }, + # Local models (for local development) "Qwen/Qwen3-4B-Thinking-2507": { "name": "Qwen3-4B-Thinking-2507", "supports_thinking": True, - "description": "Shows thinking process", - "size_gb": "~8GB" + "description": "Local: Shows thinking process", + "size_gb": "~8GB", + "type": "local" }, "Qwen/Qwen3-4B-Instruct-2507": { - "name": "Qwen3-4B-Instruct-2507", + "name": "Qwen3-4B-Instruct-2507", "supports_thinking": False, - "description": "Direct instruction following", - "size_gb": "~8GB" + "description": "Local: Direct instruction following", + "size_gb": "~8GB", + "type": "local" + }, + "qwen2.5-vl-72b-instruct": { + "name": "Qwen2.5-VL-72B-Instruct", + "supports_thinking": False, + "description": "API: Multimodal model with vision", + "size_gb": "API", + "type": "api" + }, + "Qwen/QVQ-72B-Preview": { + "name": "QVQ-72B-Preview", + "supports_thinking": True, + "description": "API: Visual reasoning with thinking", + "size_gb": "API", + "type": "api" } } # CORS settings CORS_ORIGINS = ["*"] # Allow all origins for HF Space -# Static files directory -STATIC_DIR = "static" -ASSETS_DIR = "static/assets" +# Static files directory - point directly to frontend build +FRONTEND_DIST_DIR = "frontend/dist" +ASSETS_DIR = "frontend/dist/assets" -# Server settings +# Server settings (port will be dynamically determined) HOST = "0.0.0.0" -PORT = 7860 +DEFAULT_PORT = int(os.getenv("PORT", "0")) # 0 means auto-assign a free port diff --git a/backend/core/__init__.py b/backend/core/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/backend/main.py b/backend/main.py index b2b3ced81ad84fedce9a975f563eafb5d280b7a9..f5e9f7c1bc8fc69f531c3aad764df71bfe087d86 100644 --- a/backend/main.py +++ b/backend/main.py @@ -5,7 +5,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from .api.routes import router -from .config import CORS_ORIGINS, ASSETS_DIR +from .config import CORS_ORIGINS, ASSETS_DIR, FRONTEND_DIST_DIR def create_app() -> FastAPI: diff --git a/backend/models.py b/backend/models.py index 8395f16ff632d45b6585c81856379fd742a0ed15..74831313f507fefcb1b48ae2c4d457d6ca11fbc2 100644 --- a/backend/models.py +++ b/backend/models.py @@ -5,8 +5,13 @@ from pydantic import BaseModel from typing import Optional, List +class ChatMessage(BaseModel): + role: str # 'user', 'assistant', 'system' + content: str + class PromptRequest(BaseModel): prompt: str + messages: Optional[List[ChatMessage]] = [] # Full conversation history system_prompt: Optional[str] = None model_name: Optional[str] = None temperature: Optional[float] = 0.7 @@ -27,6 +32,7 @@ class ModelInfo(BaseModel): description: str size_gb: str is_loaded: bool + type: str class ModelsResponse(BaseModel): diff --git a/backend/services/__init__.py b/backend/services/__init__.py deleted file mode 100644 index 0557eb635c5522686a57e633065437599726336a..0000000000000000000000000000000000000000 --- a/backend/services/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Services module diff --git a/backend/services/chat_service.py b/backend/services/chat_service.py index 2a02f0a358b0b95a927a48b6850f5e51ff396f94..24c8b74c2b94bc4fc41ae624d8a3901c8a40b752 100644 --- a/backend/services/chat_service.py +++ b/backend/services/chat_service.py @@ -1,26 +1,94 @@ """ -Chat generation service +Chat generation service supporting both local models and API calls """ import torch from typing import Tuple +from openai import OpenAI from .model_service import model_service -from ..config import AVAILABLE_MODELS +from ..config import AVAILABLE_MODELS, API_KEY, BASE_URL class ChatService: + def __init__(self): + # Initialize OpenAI client for API calls + self.api_client = OpenAI( + api_key=API_KEY, + base_url=BASE_URL + ) if API_KEY else None - @staticmethod - def generate_response( + def _generate_api_response( + self, prompt: str, model_name: str, + messages: list = None, system_prompt: str = None, temperature: float = 0.7, max_new_tokens: int = 1024 ) -> Tuple[str, str, str, bool]: - """ - Generate chat response - Returns: (thinking_content, final_content, model_used, supports_thinking) - """ + """Generate response using API""" + if not self.api_client: + raise ValueError("API client not configured. Please check API_KEY.") + + # Build messages with conversation history + api_messages = [] + if system_prompt: + api_messages.append({"role": "system", "content": system_prompt}) + + # Add conversation history + if messages: + for msg in messages: + api_messages.append({"role": msg.get("role"), "content": msg.get("content")}) + + # Add current prompt as the latest user message + api_messages.append({"role": "user", "content": prompt}) + + model_info = AVAILABLE_MODELS[model_name] + + try: + # Make API call + completion = self.api_client.chat.completions.create( + model=model_name, + messages=api_messages, + temperature=temperature, + max_tokens=max_new_tokens, + stream=False + ) + + generated_text = completion.choices[0].message.content + + # Parse thinking vs final content for thinking models + thinking_content = "" + final_content = generated_text + + if model_info["supports_thinking"] and "" in generated_text: + parts = generated_text.split("") + if len(parts) > 1: + thinking_part = parts[1] + if "" in thinking_part: + thinking_content = thinking_part.split("")[0].strip() + remaining = thinking_part.split("", 1)[1] if "" in thinking_part else "" + final_content = remaining.strip() + + return ( + thinking_content, + final_content, + model_name, + model_info["supports_thinking"] + ) + + except Exception as e: + raise ValueError(f"API call failed: {str(e)}") + + def _generate_local_response( + self, + prompt: str, + model_name: str, + messages: list = None, + system_prompt: str = None, + temperature: float = 0.7, + max_new_tokens: int = 1024 + ) -> Tuple[str, str, str, bool]: + """Generate response using local model""" if not model_service.is_model_loaded(model_name): raise ValueError(f"Model {model_name} is not loaded") @@ -30,15 +98,22 @@ class ChatService: tokenizer = model_data["tokenizer"] model_info = AVAILABLE_MODELS[model_name] - # Build the prompt - messages = [] + # Build the conversation with full history + conversation = [] if system_prompt: - messages.append({"role": "system", "content": system_prompt}) - messages.append({"role": "user", "content": prompt}) + conversation.append({"role": "system", "content": system_prompt}) + + # Add conversation history + if messages: + for msg in messages: + conversation.append({"role": msg.get("role"), "content": msg.get("content")}) + + # Add current prompt as the latest user message + conversation.append({"role": "user", "content": prompt}) # Apply chat template formatted_prompt = tokenizer.apply_chat_template( - messages, + conversation, tokenize=False, add_generation_prompt=True ) @@ -79,6 +154,33 @@ class ChatService: model_name, model_info["supports_thinking"] ) + + def generate_response( + self, + prompt: str, + model_name: str, + messages: list = None, + system_prompt: str = None, + temperature: float = 0.7, + max_new_tokens: int = 1024 + ) -> Tuple[str, str, str, bool]: + """ + Generate chat response using appropriate method (API or local) + Returns: (thinking_content, final_content, model_used, supports_thinking) + """ + model_info = AVAILABLE_MODELS.get(model_name) + if not model_info: + raise ValueError(f"Unknown model: {model_name}") + + # Route to appropriate generation method + if model_info["type"] == "api": + return self._generate_api_response( + prompt, model_name, messages, system_prompt, temperature, max_new_tokens + ) + else: + return self._generate_local_response( + prompt, model_name, messages, system_prompt, temperature, max_new_tokens + ) # Global chat service instance diff --git a/backend/services/model_service.py b/backend/services/model_service.py index d017cced53766c06b5805d96df49934697b0f28b..9cdfa5591ddbc34a795ababd36c62d8830461b8d 100644 --- a/backend/services/model_service.py +++ b/backend/services/model_service.py @@ -1,46 +1,60 @@ """ Model loading and management service """ -import torch from transformers import AutoModelForCausalLM, AutoTokenizer -from typing import Dict, Any, Optional +import torch +from typing import Dict, Any from ..config import AVAILABLE_MODELS class ModelService: def __init__(self): self.models_cache: Dict[str, Dict[str, Any]] = {} - self.current_model_name: Optional[str] = None - + self.current_model_name: str = None + def load_model(self, model_name: str) -> bool: - """Load a model into the cache""" - if model_name in self.models_cache: - return True - + """Load a model into memory""" if model_name not in AVAILABLE_MODELS: + print(f"Model {model_name} not available.") return False + model_info = AVAILABLE_MODELS[model_name] + + # API models don't need to be "loaded" - they're always available + if model_info["type"] == "api": + print(f"API model {model_name} is always available") + return True + + # Handle local models + if model_name in self.models_cache: + print(f"Model {model_name} already loaded.") + return True + try: - print(f"Loading model: {model_name}") + print(f"Loading local model: {model_name}") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) - - self.models_cache[model_name] = { - "model": model, - "tokenizer": tokenizer - } + self.models_cache[model_name] = {"model": model, "tokenizer": tokenizer} print(f"Model {model_name} loaded successfully") return True except Exception as e: print(f"Error loading model {model_name}: {e}") return False - + def unload_model(self, model_name: str) -> bool: - """Unload a model from the cache""" + """Unload a model from memory""" + model_info = AVAILABLE_MODELS.get(model_name, {}) + + # API models can't be "unloaded" + if model_info.get("type") == "api": + print(f"API model {model_name} cannot be unloaded") + return True + + # Handle local models if model_name in self.models_cache: del self.models_cache[model_name] if self.current_model_name == model_name: @@ -48,27 +62,47 @@ class ModelService: print(f"Model {model_name} unloaded") return True return False - + def set_current_model(self, model_name: str) -> bool: """Set the current active model""" - if model_name in self.models_cache: + if model_name not in AVAILABLE_MODELS: + return False + + model_info = AVAILABLE_MODELS[model_name] + + # API models are always "available" + if model_info["type"] == "api": self.current_model_name = model_name return True - return False - - def get_model_info(self, model_name: str) -> Dict[str, Any]: - """Get model configuration info""" - return AVAILABLE_MODELS.get(model_name, {}) - + + # Local models need to be loaded first + if model_name not in self.models_cache: + if not self.load_model(model_name): + return False + + self.current_model_name = model_name + return True + def is_model_loaded(self, model_name: str) -> bool: - """Check if a model is loaded""" + """Check if a model is loaded/available""" + model_info = AVAILABLE_MODELS.get(model_name, {}) + + # API models are always available + if model_info.get("type") == "api": + return True + + # Local models need to be in cache return model_name in self.models_cache - + def get_loaded_models(self) -> list: - """Get list of currently loaded models""" - return list(self.models_cache.keys()) - - def get_current_model(self) -> Optional[str]: + """Get list of currently loaded/available models""" + loaded = [] + for model_name, model_info in AVAILABLE_MODELS.items(): + if model_info["type"] == "api" or model_name in self.models_cache: + loaded.append(model_name) + return loaded + + def get_current_model(self) -> str: """Get the current active model""" return self.current_model_name diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/frontend/components.json b/frontend/components.json new file mode 100644 index 0000000000000000000000000000000000000000..70e291640dfc568bc5e5cd89d0be82659005ed50 --- /dev/null +++ b/frontend/components.json @@ -0,0 +1,21 @@ +{ + "$schema": "https://ui.shadcn.com/schema.json", + "style": "default", + "rsc": false, + "tsx": true, + "tailwind": { + "config": "tailwind.config.js", + "css": "src/index.css", + "baseColor": "slate", + "cssVariables": true, + "prefix": "" + }, + "aliases": { + "components": "@/components", + "utils": "@/lib/utils", + "ui": "@/components/ui", + "lib": "@/lib", + "hooks": "@/hooks" + }, + "iconLibrary": "lucide" +} diff --git a/frontend/index.html b/frontend/index.html index 28fc7629932327eeb2d923ff7a2a5f57c2077233..84ff93b2a1fd2e20c8282aa2943c2132a2cea4ec 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -4,7 +4,7 @@ - Edge LLM Platform + Edge LLM
diff --git a/frontend/package-lock.json b/frontend/package-lock.json index e14c4d6c5877cc3d3028689bf4511d7096049f7a..34aa80848f93ef2a10c260533546f6a6ad81e10b 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -16,9 +16,10 @@ "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-switch": "^1.2.6", "@tailwindcss/typography": "^0.5.16", + "ai": "^5.0.27", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "lucide-react": "^0.263.1", + "lucide-react": "^0.542.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-markdown": "^10.1.0", @@ -37,6 +38,51 @@ "vite": "^4.4.5" } }, + "node_modules/@ai-sdk/gateway": { + "version": "1.0.15", + "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-1.0.15.tgz", + "integrity": "sha512-xySXoQ29+KbGuGfmDnABx+O6vc7Gj7qugmj1kGpn0rW0rQNn6UKUuvscKMzWyv1Uv05GyC1vqHq8ZhEOLfXscQ==", + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "2.0.0", + "@ai-sdk/provider-utils": "3.0.7" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.25.76 || ^4" + } + }, + "node_modules/@ai-sdk/provider": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-2.0.0.tgz", + "integrity": "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA==", + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@ai-sdk/provider-utils": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-3.0.7.tgz", + "integrity": "sha512-o3BS5/t8KnBL3ubP8k3w77AByOypLm+pkIL/DCw0qKkhDbvhCy+L3hRTGPikpdb8WHcylAeKsjgwOxhj4cqTUA==", + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "2.0.0", + "@standard-schema/spec": "^1.0.0", + "eventsource-parser": "^3.0.5" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.25.76 || ^4" + } + }, "node_modules/@alloc/quick-lru": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", @@ -844,6 +890,15 @@ "node": ">= 8" } }, + "node_modules/@opentelemetry/api": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", + "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", + "license": "Apache-2.0", + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -1569,6 +1624,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@standard-schema/spec": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.0.0.tgz", + "integrity": "sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==", + "license": "MIT" + }, "node_modules/@tailwindcss/typography": { "version": "0.5.16", "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.16.tgz", @@ -1736,6 +1797,24 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, + "node_modules/ai": { + "version": "5.0.27", + "resolved": "https://registry.npmjs.org/ai/-/ai-5.0.27.tgz", + "integrity": "sha512-V7I9Rvrap5+3ozAjOrETA5Mv9Z1LmQobyY13U88IkFRahFp0xrEwjvYTwjQa4q5lPgLxwKgbIZRLnZSbUQwnUg==", + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/gateway": "1.0.15", + "@ai-sdk/provider": "2.0.0", + "@ai-sdk/provider-utils": "3.0.7", + "@opentelemetry/api": "1.9.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.25.76 || ^4" + } + }, "node_modules/ansi-regex": { "version": "6.2.0", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.0.tgz", @@ -2277,6 +2356,15 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/eventsource-parser": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.5.tgz", + "integrity": "sha512-bSRG85ZrMdmWtm7qkF9He9TNRzc/Bm99gEJMaQoHJ9E6Kv9QBbsldh2oMj7iXmYNEAVvNgvv5vPorG6W+XtBhQ==", + "license": "MIT", + "engines": { + "node": ">=20.0.0" + } + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -2675,6 +2763,12 @@ "node": ">=6" } }, + "node_modules/json-schema": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", + "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==", + "license": "(AFL-2.1 OR BSD-3-Clause)" + }, "node_modules/json5": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", @@ -2757,12 +2851,12 @@ } }, "node_modules/lucide-react": { - "version": "0.263.1", - "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.263.1.tgz", - "integrity": "sha512-keqxAx97PlaEN89PXZ6ki1N8nRjGWtDa4021GFYLNj0RgruM5odbpl8GHTExj0hhPq3sF6Up0gnxt6TSHu+ovw==", + "version": "0.542.0", + "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.542.0.tgz", + "integrity": "sha512-w3hD8/SQB7+lzU2r4VdFyzzOzKnUjTZIF/MQJGSSvni7Llewni4vuViRppfRAa2guOsY5k4jZyxw/i9DQHv+dw==", "license": "ISC", "peerDependencies": { - "react": "^16.5.1 || ^17.0.0 || ^18.0.0" + "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "node_modules/mdast-util-from-markdown": { @@ -4819,6 +4913,16 @@ "node": ">= 14.6" } }, + "node_modules/zod": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.5.tgz", + "integrity": "sha512-rcUUZqlLJgBC33IT3PNMgsCq6TzLQEG/Ei/KTCU0PedSWRMAXoOUN+4t/0H+Q8bdnLPdqUYnvboJT0bn/229qg==", + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", diff --git a/frontend/package.json b/frontend/package.json index 233dc8b69704c303f6e8dc18183486296a521d10..947ab9cfbd67db342d0eeaa2a76e116c7fc1ba0d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -6,6 +6,7 @@ "scripts": { "dev": "vite", "build": "tsc && vite build", + "build:watch": "tsc && vite build --watch", "preview": "vite preview" }, "dependencies": { @@ -17,9 +18,10 @@ "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-switch": "^1.2.6", "@tailwindcss/typography": "^0.5.16", + "ai": "^5.0.27", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "lucide-react": "^0.263.1", + "lucide-react": "^0.542.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-markdown": "^10.1.0", diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 49ed8148ac1db557fdcdb4879fe91c730dff7b2d..69d1a8768f6bfb6d211009dbea1b7bde4d5b1be1 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -21,4 +21,3 @@ function App() { } export default App - diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx new file mode 100644 index 0000000000000000000000000000000000000000..123fa931e6eded834885a39d7db82c21d613b109 --- /dev/null +++ b/frontend/src/components/Layout.tsx @@ -0,0 +1,18 @@ +import { Outlet } from 'react-router-dom' +import { Sidebar } from './Sidebar' + +export function Layout() { + return ( +
+ {/* Sidebar */} +
+ +
+ + {/* Main content */} +
+ +
+
+ ) +} diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index 62e2155d4c3366d200acc6d69777cdddeb16daf6..ed2360dcb3f417ebf43c6f771c7daf818be3c180 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -63,24 +63,24 @@ export function Sidebar() { const location = useLocation() return ( -
- {/* Logo/Brand */} -
+
+ {/* Header */} +
-
- +
+
-

Edge LLM

+

Edge LLM

Local AI Platform

{/* Navigation */} -
-
-

+
+
+

Get started

-

- Tools +

+ Advanced

- - {/* Footer */} -
-
-

Local Model Platform

-

Privacy-focused AI

-
-

) } diff --git a/frontend/src/components/chat/ChatContainer.tsx b/frontend/src/components/chat/ChatContainer.tsx index 8d2488393df0741d5743146dbf81533da8cfe575..9a990fbba24b99db4cfd7192db6bf6604bff336d 100644 --- a/frontend/src/components/chat/ChatContainer.tsx +++ b/frontend/src/components/chat/ChatContainer.tsx @@ -1,113 +1,185 @@ -import { useEffect, useRef } from 'react' -import { ChatMessage } from './ChatMessage' -import { ChatInput } from './ChatInput' +import React from 'react' +import ReactMarkdown from 'react-markdown' +import { Button } from '@/components/ui/button' +import { Textarea } from '@/components/ui/textarea' +import { Card } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' import { Message } from '@/types/chat' -import { Loader2 } from 'lucide-react' -import { cn } from '@/lib/utils' +import { Send, Square, Eye, EyeOff, Brain, User, Bot } from 'lucide-react' interface ChatContainerProps { messages: Message[] input: string - onInputChange: (value: string) => void + setInput: (value: string) => void onSubmit: () => void - onStop?: () => void - isLoading?: boolean + onStop: () => void + isLoading: boolean disabled?: boolean - className?: string placeholder?: string } export function ChatContainer({ messages, input, - onInputChange, + setInput, onSubmit, onStop, - isLoading = false, + isLoading, disabled = false, - className, - placeholder = "Ask me anything..." + placeholder = "Type your message..." }: ChatContainerProps) { - const messagesEndRef = useRef(null) - const messagesContainerRef = useRef(null) + const [showThinking, setShowThinking] = React.useState<{ [key: string]: boolean }>({}) - // Auto-scroll to bottom when new messages arrive - useEffect(() => { - if (messagesEndRef.current) { - messagesEndRef.current.scrollIntoView({ behavior: 'smooth' }) + const handleKeyPress = (e: React.KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + if (!isLoading && !disabled) { + onSubmit() + } } - }, [messages, isLoading]) + } - const handleCopyMessage = (content: string) => { - navigator.clipboard.writeText(content) - // Could add a toast notification here + const toggleThinking = (messageId: string) => { + setShowThinking(prev => ({ + ...prev, + [messageId]: !prev[messageId] + })) } return ( -
- {/* Messages Area */} -
+
+ {/* Messages */} +
{messages.length === 0 ? ( -
-
-
-

Start a conversation

-

- Ask me anything! I can help with coding, writing, analysis, and more. -

-
+
+
+ +

Start a conversation

+

+ Ask me anything and I'll help you out! +

) : ( - <> - {messages.map((message) => ( -
- -
- ))} - - {/* Loading indicator */} - {isLoading && ( -
- {/* Assistant avatar */} -
- -
+ messages.map((message) => ( +
+
+ {message.role !== 'user' && ( +
+ +
+ )} - {/* Loading message */} -
-
-
- - Thinking... + +
+
+ {message.role === 'user' ? ( + + ) : ( + + )} + + {message.role === 'user' ? 'You' : 'Assistant'} + + {message.model_used && ( + + {message.model_used} + + )}
+ + {/* Thinking content */} + {message.thinking_content && message.supports_thinking && ( +
+ + + {showThinking[message.id] && ( +
+
Thinking process:
+

{children}

+ }} + > + {message.thinking_content} +
+
+ )} +
+ )} + + {/* Main content */} +

{children}

+ }} + > + {message.content} +
-
+ + + {message.role === 'user' && ( +
+ +
+ )}
- )} - +
+ )) )} - - {/* Scroll anchor */} -
- {/* Input Area */} - + {/* Input area */} +
+
+