ollama-api

Build error

App Files Files Community

tommytracx commited on Aug 25

Commit

1c7834f

verified ·

1 Parent(s): 95ca0e1

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -347

app.py CHANGED Viewed

@@ -1,474 +1,225 @@
-# app.py
-from flask import Flask, request, jsonify, Response
 import os
-import requests
 import json
 import logging
-from logging.handlers import RotatingFileHandler
 from typing import Dict, Any, List
-import time
-import socket
 app = Flask(__name__)
-# Configure logging with file output
-log_handler = RotatingFileHandler('/home/ollama/server.log', maxBytes=1000000, backupCount=5)
-log_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-logging.getLogger().addHandler(log_handler)
-logging.getLogger().setLevel(logging.INFO)
 # Configuration
 OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
 ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
-MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
-TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
 class OllamaManager:
     def __init__(self, base_url: str):
-        self.base_url = base_url.rstrip('/')
-        self.available_models = ALLOWED_MODELS  # Initialize with allowed models
         self.refresh_models()
-    def refresh_models(self) -> None:
-        """Refresh the list of available models from local Ollama API, falling back to ALLOWED_MODELS."""
         try:
             response = requests.get(f"{self.base_url}/api/tags", timeout=10)
-            response.raise_for_status()
-            data = response.json()
-            models = [model['name'] for model in data.get('models', [])]
-            # Filter models to only include those in ALLOWED_MODELS
-            self.available_models = [model for model in models if model in ALLOWED_MODELS]
-            if not self.available_models:
-                self.available_models = ALLOWED_MODELS
-                logging.warning("No allowed models found in API response, using ALLOWED_MODELS")
-            logging.info(f"Available models: {self.available_models}")
-        except requests.exceptions.ConnectionError as e:
-            logging.error(f"Connection error while refreshing models: {e}")
-            self.available_models = ALLOWED_MODELS
-        except requests.exceptions.HTTPError as e:
-            logging.error(f"HTTP error while refreshing models: {e}")
-            self.available_models = ALLOWED_MODELS
         except Exception as e:
-            logging.error(f"Unexpected error refreshing models: {e}")
-            self.available_models = ALLOWED_MODELS
     def list_models(self) -> List[str]:
-        """Return the list of available models without refreshing."""
         return self.available_models
     def pull_model(self, model_name: str) -> Dict[str, Any]:
-        """Pull a model from Ollama."""
-        if model_name not in ALLOWED_MODELS:
-            logging.warning(f"Attempted to pull unauthorized model: {model_name}")
-            return {"status": "error", "message": f"Model {model_name} not in allowed list"}
         try:
-            response = requests.post(f"{self.base_url}/api/pull", json={"name": model_name}, timeout=300)
-            response.raise_for_status()
-            self.refresh_models()  # Refresh models after pulling
-            logging.info(f"Successfully pulled model: {model_name}")
-            return {"status": "success", "model": model_name}
-        except requests.exceptions.ConnectionError as e:
-            logging.error(f"Connection error pulling model {model_name}: {e}")
-            return {"status": "error", "message": f"Connection error: {str(e)}"}
-        except requests.exceptions.HTTPError as e:
-            logging.error(f"HTTP error pulling model {model_name}: {e}")
-            return {"status": "error", "message": f"HTTP error: {str(e)}"}
         except Exception as e:
-            logging.error(f"Unexpected error pulling model {model_name}: {e}")
             return {"status": "error", "message": str(e)}
-    def generate(self, model_name: str, prompt: str, stream: bool = False, **kwargs) -> Any:
-        """Generate text using a model, with optional streaming."""
-        if model_name not in self.available_models:
-            logging.warning(f"Attempted to generate with unavailable model: {model_name}")
-            return {"status": "error", "message": f"Model {model_name} not available"}
         try:
             payload = {
                 "model": model_name,
                 "prompt": prompt,
-                "stream": stream,
-                **kwargs
             }
-            if stream:
-                response = requests.post(f"{self.base_url}/api/generate", json=payload, stream=True, timeout=120)
-                response.raise_for_status()
-                return response
-            else:
-                response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
-                response.raise_for_status()
                 data = response.json()
-                logging.info(f"Generated response with model {model_name}")
                 return {
                     "status": "success",
                     "response": data.get('response', ''),
                     "model": model_name,
                     "usage": data.get('usage', {})
                 }
-        except requests.exceptions.ConnectionError as e:
-            logging.error(f"Connection error generating response with model {model_name}: {e}")
-            return {"status": "error", "message": f"Connection error: {str(e)}"}
-        except requests.exceptions.HTTPError as e:
-            logging.error(f"HTTP error generating response with model {model_name}: {e}")
-            return {"status": "error", "message": f"HTTP error: {str(e)}"}
         except Exception as e:
-            logging.error(f"Unexpected error generating response with model {model_name}: {e}")
             return {"status": "error", "message": str(e)}
 # Initialize Ollama manager
 ollama_manager = OllamaManager(OLLAMA_BASE_URL)
-# HTML template for the home page with modernized UI
-HTML_TEMPLATE = '''
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Ollama API Space</title>
-    <style>
-        :root {
-            --primary-color: #667eea;
-            --secondary-color: #764ba2;
-            --text-color: #333;
-            --bg-color: #fafbfc;
-            --border-color: #e9ecef;
-            --input-bg: white;
-        }
-        .dark-mode {
-            --primary-color: #3b4a8c;
-            --secondary-color: #4a2e6b;
-            --text-color: #f0f0f0;
-            --bg-color: #1a1a1a;
-            --border-color: #4a4a4a;
-            --input-bg: #3a3a3a;
-        }
-        * {
-            margin: 0;
-            padding: 0;
-            box-sizing: border-box;
-        }
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-            background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
-            color: var(--text-color);
-            min-height: 100vh;
-            padding: 20px;
-        }
-        .container {
-            max-width: 900px;
-            margin: 0 auto;
-            background: var(--bg-color);
-            border-radius: 20px;
-            box-shadow: 0 20px 40px rgba(0,0,0,0.1);
-            padding: 30px;
-            position: relative;
-        }
-        .theme-toggle {
-            position: absolute;
-            top: 15px;
-            right: 15px;
-            background: none;
-            border: none;
-            cursor: pointer;
-            font-size: 1.5rem;
-            color: var(--text-color);
-            transition: transform 0.2s;
-        }
-        .theme-toggle:hover {
-            transform: scale(1.1);
-        }
-        h1 {
-            font-size: 2.8rem;
-            margin-bottom: 20px;
-            text-align: center;
-            font-weight: 700;
-        }
-        p {
-            font-size: 1.2rem;
-            line-height: 1.6;
-            margin-bottom: 20px;
-        }
-        h2 {
-            font-size: 1.8rem;
-            margin-top: 30px;
-            margin-bottom: 15px;
-        }
-        .endpoint {
-            background: var(--border-color);
-            padding: 20px;
-            margin: 15px 0;
-            border-radius: 10px;
-            transition: transform 0.2s;
-        }
-        .endpoint:hover {
-            transform: translateY(-2px);
-        }
-        .method {
-            background: var(--primary-color);
-            color: white;
-            padding: 5px 12px;
-            border-radius: 5px;
-            font-size: 14px;
-            margin-right: 10px;
-        }
-        .url {
-            font-family: monospace;
-            background: var(--input-bg);
-            padding: 5px 10px;
-            border-radius: 5px;
-            color: var(--text-color);
-        }
-        pre {
-            background: var(--border-color);
-            padding: 20px;
-            border-radius: 10px;
-            overflow-x: auto;
-            font-size: 14px;
-        }
-        code {
-            font-family: monospace;
-            font-size: 14px;
-        }
-        .dark-mode pre, .dark-mode .endpoint {
-            background: #2a2a2a;
-        }
-        .status {
-            text-align: center;
-            padding: 10px;
-            font-size: 14px;
-            color: #6c757d;
-        }
-        .status.error {
-            color: #dc3545;
-        }
-        .status.success {
-            color: #28a745;
-        }
-        @media (max-width: 768px) {
-            .container {
-                padding: 20px;
-            }
-            h1 {
-                font-size: 2.2rem;
-            }
-            h2 {
-                font-size: 1.5rem;
-            }
-        }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <button class="theme-toggle" id="theme-toggle">🌙</button>
         <h1>🚀 Ollama API Space</h1>
-        <p>This Space provides a robust API for managing and interacting with Ollama models, optimized for integration with OpenWebUI and other clients. It supports model listing, pulling, and text generation with streaming capabilities.</p>
         <h2>Available Endpoints</h2>
         <div class="endpoint">
             <span class="method">GET</span> <span class="url">/api/models</span>
-            <p>List all available models filtered by ALLOWED_MODELS ({{ allowed_models }}).</p>
-            <p><strong>Response:</strong> <code>{"status": "success", "models": [...], "count": N}</code></p>
         </div>
         <div class="endpoint">
             <span class="method">POST</span> <span class="url">/api/models/pull</span>
-            <p>Pull a model from Ollama, restricted to ALLOWED_MODELS.</p>
-            <p><strong>Body:</strong> <code>{"name": "model_name"}</code></p>
-            <p><strong>Response:</strong> <code>{"status": "success", "model": "model_name"}</code></p>
         </div>
         <div class="endpoint">
             <span class="method">POST</span> <span class="url">/api/generate</span>
-            <p>Generate text using a model, with optional streaming.</p>
-            <p><strong>Body:</strong> <code>{"model": "model_name", "prompt": "your prompt", "stream": boolean, "temperature": float, "max_tokens": int}</code></p>
-            <p><strong>Response (non-streaming):</strong> <code>{"status": "success", "response": "...", "model": "...", "usage": {...}}</code></p>
-            <p><strong>Response (streaming):</strong> Stream of JSON objects: <code>{"response": "..."}</code></p>
         </div>
         <div class="endpoint">
             <span class="method">GET</span> <span class="url">/health</span>
-            <p>Health check endpoint for the API and Ollama connection.</p>
-            <p><strong>Response:</strong> <code>{"status": "healthy", "ollama_connection": "connected", "available_models": N, "timestamp": N}</code></p>
         </div>
         <h2>Usage Examples</h2>
-        <p>Use this API with OpenWebUI or any REST client. Ensure models are in ALLOWED_MODELS: {{ allowed_models }}.</p>
         <h3>cURL Examples</h3>
         <pre>
 # List models
-curl {{ ollama_base_url }}/api/models
-# Pull a model
-curl -X POST {{ ollama_base_url }}/api/models/pull \
-  -H "Content-Type: application/json" \
-  -d '{"name": "gemma-3-270m"}'
-# Generate text (non-streaming)
-curl -X POST {{ ollama_base_url }}/api/generate \
-  -H "Content-Type: application/json" \
-  -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "temperature": 0.7, "max_tokens": 2048}'
-# Generate text (streaming)
-curl -X POST {{ ollama_base_url }}/api/generate \
   -H "Content-Type: application/json" \
-  -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "stream": true}'
         </pre>
-        <div class="status" id="status"></div>
-    </div>
-    <script>
-        document.addEventListener('DOMContentLoaded', async function() {
-            const themeToggle = document.getElementById('theme-toggle');
-            themeToggle.addEventListener('click', function() {
-                document.body.classList.toggle('dark-mode');
-                themeToggle.textContent = document.body.classList.contains('dark-mode') ? '☀️' : '🌙';
-                localStorage.setItem('theme', document.body.classList.contains('dark-mode') ? 'dark' : 'light');
-            });
-            if (localStorage.getItem('theme') === 'dark') {
-                document.body.classList.add('dark-mode');
-                themeToggle.textContent = '☀️';
-            }
-            // Fetch and display available models
-            try {
-                const response = await fetch('/api/models');
-                const data = await response.json();
-                const statusDiv = document.getElementById('status');
-                if (data.status === 'success') {
-                    statusDiv.textContent = `Available models: ${data.models.join(', ')}`;
-                    statusDiv.className = 'status success';
-                } else {
-                    statusDiv.textContent = `Error loading models: ${data.message}`;
-                    statusDiv.className = 'status error';
-                }
-            } catch (error) {
-                const statusDiv = document.getElementById('status');
-                statusDiv.textContent = `Error loading models: ${error.message}`;
-                statusDiv.className = 'status error';
-            }
-        });
-    </script>
-</body>
-</html>
-'''
-@app.route('/')
-def home():
-    """Home page with API documentation."""
-    return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, allowed_models=', '.join(ALLOWED_MODELS))
 @app.route('/api/models', methods=['GET'])
 def list_models():
-    """List all available models."""
     try:
         models = ollama_manager.list_models()
-        logging.info(f"Returning models: {models}")
         return jsonify({
             "status": "success",
             "models": models,
             "count": len(models)
         })
     except Exception as e:
-        logging.error(f"Models endpoint error: {e}")
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/api/models/pull', methods=['POST'])
 def pull_model():
-    """Pull a model from Ollama."""
     try:
         data = request.get_json()
         if not data or 'name' not in data:
-            logging.warning("Model pull request missing 'name' field")
             return jsonify({"status": "error", "message": "Model name is required"}), 400
         model_name = data['name']
         if model_name not in ALLOWED_MODELS:
-            logging.warning(f"Attempted to pull unauthorized model: {model_name}")
             return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
         result = ollama_manager.pull_model(model_name)
-        return jsonify(result), 200 if result["status"] == "success" else 500
     except Exception as e:
-        logging.error(f"Pull model endpoint error: {e}")
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/api/generate', methods=['POST'])
 def generate_text():
-    """Generate text using a model, with optional streaming."""
     try:
         data = request.get_json()
         if not data or 'model' not in data or 'prompt' not in data:
-            logging.warning("Generate request missing 'model' or 'prompt' field")
             return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
         model_name = data['model']
         prompt = data['prompt']
-        stream = data.get('stream', False)
-        kwargs = {k: v for k in data if k not in ['model', 'prompt', 'stream']}
-        result = ollama_manager.generate(model_name, prompt, stream=stream, **kwargs)
-        if stream and isinstance(result, requests.Response):
-            def generate_stream():
-                try:
-                    for chunk in result.iter_content(chunk_size=None):
-                        yield chunk
-                except Exception as e:
-                    logging.error(f"Streaming error: {e}")
-                    yield json.dumps({"status": "error", "message": str(e)}).encode()
-            return Response(generate_stream(), content_type='application/json')
         else:
-            return jsonify(result), 200 if result["status"] == "success" else 500
     except Exception as e:
-        logging.error(f"Generate endpoint error: {e}")
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/health', methods=['GET'])
 def health_check():
-    """Health check endpoint."""
     try:
         response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
-        response.raise_for_status()
-        logging.info("Health check successful")
-        return jsonify({
-            "status": "healthy",
-            "ollama_connection": "connected",
-            "available_models": len(ollama_manager.available_models),
-            "timestamp": time.time(),
-            "hostname": socket.gethostname()
-        })
-    except requests.exceptions.ConnectionError as e:
-        logging.error(f"Health check connection error: {e}")
-        return jsonify({
-            "status": "unhealthy",
-            "ollama_connection": "failed",
-            "error": f"Connection error: {str(e)}",
-            "timestamp": time.time(),
-            "hostname": socket.gethostname()
-        }), 503
-    except requests.exceptions.HTTPError as e:
-        logging.error(f"Health check HTTP error: {e}")
-        return jsonify({
-            "status": "unhealthy",
-            "ollama_connection": "failed",
-            "error": f"HTTP error: {str(e)}",
-            "timestamp": time.time(),
-            "hostname": socket.gethostname()
-        }), 503
     except Exception as e:
-        logging.error(f"Health check unexpected error: {e}")
         return jsonify({
             "status": "unhealthy",
             "ollama_connection": "failed",
-            "error": str(e),
-            "timestamp": time.time(),
-            "hostname": socket.gethostname()
         }), 503
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860, debug=False)

+from flask import Flask, request, jsonify
 import os
+import subprocess
 import json
 import logging
 from typing import Dict, Any, List
+import requests
 app = Flask(__name__)
+logging.basicConfig(level=logging.INFO)
 # Configuration
 OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
+MODELS_DIR = os.getenv('MODELS_DIR', '/models')
 ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
 class OllamaManager:
     def __init__(self, base_url: str):
+        self.base_url = base_url
+        self.available_models = []
         self.refresh_models()
+    def refresh_models(self):
+        """Refresh the list of available models"""
         try:
             response = requests.get(f"{self.base_url}/api/tags", timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                self.available_models = [model['name'] for model in data.get('models', [])]
+            else:
+                self.available_models = []
         except Exception as e:
+            logging.error(f"Error refreshing models: {e}")
+            self.available_models = []
     def list_models(self) -> List[str]:
+        """List all available models"""
+        self.refresh_models()
         return self.available_models
     def pull_model(self, model_name: str) -> Dict[str, Any]:
+        """Pull a model from Ollama"""
         try:
+            response = requests.post(f"{self.base_url}/api/pull",
+                                  json={"name": model_name},
+                                  timeout=300)
+            if response.status_code == 200:
+                return {"status": "success", "model": model_name}
+            else:
+                return {"status": "error", "message": f"Failed to pull model: {response.text}"}
         except Exception as e:
             return {"status": "error", "message": str(e)}
+    def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
+        """Generate text using a model"""
         try:
             payload = {
                 "model": model_name,
                 "prompt": prompt,
+                "stream": False
             }
+            payload.update(kwargs)
+            response = requests.post(f"{self.base_url}/api/generate",
+                                  json=payload,
+                                  timeout=120)
+            if response.status_code == 200:
                 data = response.json()
                 return {
                     "status": "success",
                     "response": data.get('response', ''),
                     "model": model_name,
                     "usage": data.get('usage', {})
                 }
+            else:
+                return {"status": "error", "message": f"Generation failed: {response.text}"}
         except Exception as e:
             return {"status": "error", "message": str(e)}
 # Initialize Ollama manager
 ollama_manager = OllamaManager(OLLAMA_BASE_URL)
+@app.route('/')
+def home():
+    """Home page with API documentation"""
+    return '''
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Ollama API Space</title>
+        <style>
+            body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
+            .endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
+            .method { background: #007bff; color: white; padding: 2px 8px; border-radius: 3px; font-size: 12px; }
+            .url { font-family: monospace; background: #e9ecef; padding: 2px 6px; border-radius: 3px; }
+        </style>
+    </head>
+    <body>
         <h1>🚀 Ollama API Space</h1>
+        <p>This Space provides API endpoints for Ollama model management and inference.</p>
         <h2>Available Endpoints</h2>
         <div class="endpoint">
             <span class="method">GET</span> <span class="url">/api/models</span>
+            <p>List all available models</p>
         </div>
         <div class="endpoint">
             <span class="method">POST</span> <span class="url">/api/models/pull</span>
+            <p>Pull a model from Ollama</p>
+            <p>Body: {"name": "model_name"}</p>
         </div>
         <div class="endpoint">
             <span class="method">POST</span> <span class="url">/api/generate</span>
+            <p>Generate text using a model</p>
+            <p>Body: {"model": "model_name", "prompt": "your prompt"}</p>
         </div>
         <div class="endpoint">
             <span class="method">GET</span> <span class="url">/health</span>
+            <p>Health check endpoint</p>
         </div>
         <h2>Usage Examples</h2>
+        <p>You can use this API with OpenWebUI or any other client that supports REST APIs.</p>
         <h3>cURL Examples</h3>
         <pre>
 # List models
+curl https://your-space-url.hf.space/api/models
+# Generate text
+curl -X POST https://your-space-url.hf.space/api/generate \
   -H "Content-Type: application/json" \
+  -d '{"model": "llama2", "prompt": "Hello, how are you?"}'
         </pre>
+    </body>
+    </html>
+    '''
 @app.route('/api/models', methods=['GET'])
 def list_models():
+    """List all available models"""
     try:
         models = ollama_manager.list_models()
         return jsonify({
             "status": "success",
             "models": models,
             "count": len(models)
         })
     except Exception as e:
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/api/models/pull', methods=['POST'])
 def pull_model():
+    """Pull a model from Ollama"""
     try:
         data = request.get_json()
         if not data or 'name' not in data:
             return jsonify({"status": "error", "message": "Model name is required"}), 400
         model_name = data['name']
         if model_name not in ALLOWED_MODELS:
             return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
         result = ollama_manager.pull_model(model_name)
+        if result["status"] == "success":
+            return jsonify(result), 200
+        else:
+            return jsonify(result), 500
     except Exception as e:
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/api/generate', methods=['POST'])
 def generate_text():
+    """Generate text using a model"""
     try:
         data = request.get_json()
         if not data or 'model' not in data or 'prompt' not in data:
             return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
         model_name = data['model']
         prompt = data['prompt']
+        # Remove additional parameters that might be passed
+        kwargs = {k: v for k, v in data.items() if k not in ['model', 'prompt']}
+        result = ollama_manager.generate(model_name, prompt, **kwargs)
+        if result["status"] == "success":
+            return jsonify(result), 200
         else:
+            return jsonify(result), 500
     except Exception as e:
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/health', methods=['GET'])
 def health_check():
+    """Health check endpoint"""
     try:
+        # Try to connect to Ollama
         response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
+        if response.status_code == 200:
+            return jsonify({
+                "status": "healthy",
+                "ollama_connection": "connected",
+                "available_models": len(ollama_manager.available_models)
+            })
+        else:
+            return jsonify({
+                "status": "unhealthy",
+                "ollama_connection": "failed",
+                "error": f"Ollama returned status {response.status_code}"
+            }), 503
     except Exception as e:
         return jsonify({
             "status": "unhealthy",
             "ollama_connection": "failed",
+            "error": str(e)
         }), 503
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=False)