tommytracx commited on
Commit
1c7834f
·
verified ·
1 Parent(s): 95ca0e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -347
app.py CHANGED
@@ -1,474 +1,225 @@
1
- # app.py
2
- from flask import Flask, request, jsonify, Response
3
  import os
4
- import requests
5
  import json
6
  import logging
7
- from logging.handlers import RotatingFileHandler
8
  from typing import Dict, Any, List
9
- import time
10
- import socket
11
 
12
  app = Flask(__name__)
13
-
14
- # Configure logging with file output
15
- log_handler = RotatingFileHandler('/home/ollama/server.log', maxBytes=1000000, backupCount=5)
16
- log_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
17
- logging.getLogger().addHandler(log_handler)
18
- logging.getLogger().setLevel(logging.INFO)
19
 
20
  # Configuration
21
  OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
 
22
  ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
23
- MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
24
- TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
25
 
26
  class OllamaManager:
27
  def __init__(self, base_url: str):
28
- self.base_url = base_url.rstrip('/')
29
- self.available_models = ALLOWED_MODELS # Initialize with allowed models
30
  self.refresh_models()
31
 
32
- def refresh_models(self) -> None:
33
- """Refresh the list of available models from local Ollama API, falling back to ALLOWED_MODELS."""
34
  try:
35
  response = requests.get(f"{self.base_url}/api/tags", timeout=10)
36
- response.raise_for_status()
37
- data = response.json()
38
- models = [model['name'] for model in data.get('models', [])]
39
- # Filter models to only include those in ALLOWED_MODELS
40
- self.available_models = [model for model in models if model in ALLOWED_MODELS]
41
- if not self.available_models:
42
- self.available_models = ALLOWED_MODELS
43
- logging.warning("No allowed models found in API response, using ALLOWED_MODELS")
44
- logging.info(f"Available models: {self.available_models}")
45
- except requests.exceptions.ConnectionError as e:
46
- logging.error(f"Connection error while refreshing models: {e}")
47
- self.available_models = ALLOWED_MODELS
48
- except requests.exceptions.HTTPError as e:
49
- logging.error(f"HTTP error while refreshing models: {e}")
50
- self.available_models = ALLOWED_MODELS
51
  except Exception as e:
52
- logging.error(f"Unexpected error refreshing models: {e}")
53
- self.available_models = ALLOWED_MODELS
54
 
55
  def list_models(self) -> List[str]:
56
- """Return the list of available models without refreshing."""
 
57
  return self.available_models
58
 
59
  def pull_model(self, model_name: str) -> Dict[str, Any]:
60
- """Pull a model from Ollama."""
61
- if model_name not in ALLOWED_MODELS:
62
- logging.warning(f"Attempted to pull unauthorized model: {model_name}")
63
- return {"status": "error", "message": f"Model {model_name} not in allowed list"}
64
-
65
  try:
66
- response = requests.post(f"{self.base_url}/api/pull", json={"name": model_name}, timeout=300)
67
- response.raise_for_status()
68
- self.refresh_models() # Refresh models after pulling
69
- logging.info(f"Successfully pulled model: {model_name}")
70
- return {"status": "success", "model": model_name}
71
- except requests.exceptions.ConnectionError as e:
72
- logging.error(f"Connection error pulling model {model_name}: {e}")
73
- return {"status": "error", "message": f"Connection error: {str(e)}"}
74
- except requests.exceptions.HTTPError as e:
75
- logging.error(f"HTTP error pulling model {model_name}: {e}")
76
- return {"status": "error", "message": f"HTTP error: {str(e)}"}
77
  except Exception as e:
78
- logging.error(f"Unexpected error pulling model {model_name}: {e}")
79
  return {"status": "error", "message": str(e)}
80
 
81
- def generate(self, model_name: str, prompt: str, stream: bool = False, **kwargs) -> Any:
82
- """Generate text using a model, with optional streaming."""
83
- if model_name not in self.available_models:
84
- logging.warning(f"Attempted to generate with unavailable model: {model_name}")
85
- return {"status": "error", "message": f"Model {model_name} not available"}
86
-
87
  try:
88
  payload = {
89
  "model": model_name,
90
  "prompt": prompt,
91
- "stream": stream,
92
- **kwargs
93
  }
94
- if stream:
95
- response = requests.post(f"{self.base_url}/api/generate", json=payload, stream=True, timeout=120)
96
- response.raise_for_status()
97
- return response
98
- else:
99
- response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
100
- response.raise_for_status()
101
  data = response.json()
102
- logging.info(f"Generated response with model {model_name}")
103
  return {
104
  "status": "success",
105
  "response": data.get('response', ''),
106
  "model": model_name,
107
  "usage": data.get('usage', {})
108
  }
109
- except requests.exceptions.ConnectionError as e:
110
- logging.error(f"Connection error generating response with model {model_name}: {e}")
111
- return {"status": "error", "message": f"Connection error: {str(e)}"}
112
- except requests.exceptions.HTTPError as e:
113
- logging.error(f"HTTP error generating response with model {model_name}: {e}")
114
- return {"status": "error", "message": f"HTTP error: {str(e)}"}
115
  except Exception as e:
116
- logging.error(f"Unexpected error generating response with model {model_name}: {e}")
117
  return {"status": "error", "message": str(e)}
118
 
119
  # Initialize Ollama manager
120
  ollama_manager = OllamaManager(OLLAMA_BASE_URL)
121
 
122
- # HTML template for the home page with modernized UI
123
- HTML_TEMPLATE = '''
124
- <!DOCTYPE html>
125
- <html lang="en">
126
- <head>
127
- <meta charset="UTF-8">
128
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
129
- <title>Ollama API Space</title>
130
- <style>
131
- :root {
132
- --primary-color: #667eea;
133
- --secondary-color: #764ba2;
134
- --text-color: #333;
135
- --bg-color: #fafbfc;
136
- --border-color: #e9ecef;
137
- --input-bg: white;
138
- }
139
- .dark-mode {
140
- --primary-color: #3b4a8c;
141
- --secondary-color: #4a2e6b;
142
- --text-color: #f0f0f0;
143
- --bg-color: #1a1a1a;
144
- --border-color: #4a4a4a;
145
- --input-bg: #3a3a3a;
146
- }
147
- * {
148
- margin: 0;
149
- padding: 0;
150
- box-sizing: border-box;
151
- }
152
- body {
153
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
154
- background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
155
- color: var(--text-color);
156
- min-height: 100vh;
157
- padding: 20px;
158
- }
159
- .container {
160
- max-width: 900px;
161
- margin: 0 auto;
162
- background: var(--bg-color);
163
- border-radius: 20px;
164
- box-shadow: 0 20px 40px rgba(0,0,0,0.1);
165
- padding: 30px;
166
- position: relative;
167
- }
168
- .theme-toggle {
169
- position: absolute;
170
- top: 15px;
171
- right: 15px;
172
- background: none;
173
- border: none;
174
- cursor: pointer;
175
- font-size: 1.5rem;
176
- color: var(--text-color);
177
- transition: transform 0.2s;
178
- }
179
- .theme-toggle:hover {
180
- transform: scale(1.1);
181
- }
182
- h1 {
183
- font-size: 2.8rem;
184
- margin-bottom: 20px;
185
- text-align: center;
186
- font-weight: 700;
187
- }
188
- p {
189
- font-size: 1.2rem;
190
- line-height: 1.6;
191
- margin-bottom: 20px;
192
- }
193
- h2 {
194
- font-size: 1.8rem;
195
- margin-top: 30px;
196
- margin-bottom: 15px;
197
- }
198
- .endpoint {
199
- background: var(--border-color);
200
- padding: 20px;
201
- margin: 15px 0;
202
- border-radius: 10px;
203
- transition: transform 0.2s;
204
- }
205
- .endpoint:hover {
206
- transform: translateY(-2px);
207
- }
208
- .method {
209
- background: var(--primary-color);
210
- color: white;
211
- padding: 5px 12px;
212
- border-radius: 5px;
213
- font-size: 14px;
214
- margin-right: 10px;
215
- }
216
- .url {
217
- font-family: monospace;
218
- background: var(--input-bg);
219
- padding: 5px 10px;
220
- border-radius: 5px;
221
- color: var(--text-color);
222
- }
223
- pre {
224
- background: var(--border-color);
225
- padding: 20px;
226
- border-radius: 10px;
227
- overflow-x: auto;
228
- font-size: 14px;
229
- }
230
- code {
231
- font-family: monospace;
232
- font-size: 14px;
233
- }
234
- .dark-mode pre, .dark-mode .endpoint {
235
- background: #2a2a2a;
236
- }
237
- .status {
238
- text-align: center;
239
- padding: 10px;
240
- font-size: 14px;
241
- color: #6c757d;
242
- }
243
- .status.error {
244
- color: #dc3545;
245
- }
246
- .status.success {
247
- color: #28a745;
248
- }
249
- @media (max-width: 768px) {
250
- .container {
251
- padding: 20px;
252
- }
253
- h1 {
254
- font-size: 2.2rem;
255
- }
256
- h2 {
257
- font-size: 1.5rem;
258
- }
259
- }
260
- </style>
261
- </head>
262
- <body>
263
- <div class="container">
264
- <button class="theme-toggle" id="theme-toggle">🌙</button>
265
  <h1>🚀 Ollama API Space</h1>
266
- <p>This Space provides a robust API for managing and interacting with Ollama models, optimized for integration with OpenWebUI and other clients. It supports model listing, pulling, and text generation with streaming capabilities.</p>
267
 
268
  <h2>Available Endpoints</h2>
269
 
270
  <div class="endpoint">
271
  <span class="method">GET</span> <span class="url">/api/models</span>
272
- <p>List all available models filtered by ALLOWED_MODELS ({{ allowed_models }}).</p>
273
- <p><strong>Response:</strong> <code>{"status": "success", "models": [...], "count": N}</code></p>
274
  </div>
275
 
276
  <div class="endpoint">
277
  <span class="method">POST</span> <span class="url">/api/models/pull</span>
278
- <p>Pull a model from Ollama, restricted to ALLOWED_MODELS.</p>
279
- <p><strong>Body:</strong> <code>{"name": "model_name"}</code></p>
280
- <p><strong>Response:</strong> <code>{"status": "success", "model": "model_name"}</code></p>
281
  </div>
282
 
283
  <div class="endpoint">
284
  <span class="method">POST</span> <span class="url">/api/generate</span>
285
- <p>Generate text using a model, with optional streaming.</p>
286
- <p><strong>Body:</strong> <code>{"model": "model_name", "prompt": "your prompt", "stream": boolean, "temperature": float, "max_tokens": int}</code></p>
287
- <p><strong>Response (non-streaming):</strong> <code>{"status": "success", "response": "...", "model": "...", "usage": {...}}</code></p>
288
- <p><strong>Response (streaming):</strong> Stream of JSON objects: <code>{"response": "..."}</code></p>
289
  </div>
290
 
291
  <div class="endpoint">
292
  <span class="method">GET</span> <span class="url">/health</span>
293
- <p>Health check endpoint for the API and Ollama connection.</p>
294
- <p><strong>Response:</strong> <code>{"status": "healthy", "ollama_connection": "connected", "available_models": N, "timestamp": N}</code></p>
295
  </div>
296
 
297
  <h2>Usage Examples</h2>
298
- <p>Use this API with OpenWebUI or any REST client. Ensure models are in ALLOWED_MODELS: {{ allowed_models }}.</p>
299
 
300
  <h3>cURL Examples</h3>
301
  <pre>
302
  # List models
303
- curl {{ ollama_base_url }}/api/models
304
-
305
- # Pull a model
306
- curl -X POST {{ ollama_base_url }}/api/models/pull \
307
- -H "Content-Type: application/json" \
308
- -d '{"name": "gemma-3-270m"}'
309
-
310
- # Generate text (non-streaming)
311
- curl -X POST {{ ollama_base_url }}/api/generate \
312
- -H "Content-Type: application/json" \
313
- -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "temperature": 0.7, "max_tokens": 2048}'
314
 
315
- # Generate text (streaming)
316
- curl -X POST {{ ollama_base_url }}/api/generate \
317
  -H "Content-Type: application/json" \
318
- -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "stream": true}'
319
  </pre>
320
-
321
- <div class="status" id="status"></div>
322
- </div>
323
-
324
- <script>
325
- document.addEventListener('DOMContentLoaded', async function() {
326
- const themeToggle = document.getElementById('theme-toggle');
327
- themeToggle.addEventListener('click', function() {
328
- document.body.classList.toggle('dark-mode');
329
- themeToggle.textContent = document.body.classList.contains('dark-mode') ? '☀️' : '🌙';
330
- localStorage.setItem('theme', document.body.classList.contains('dark-mode') ? 'dark' : 'light');
331
- });
332
- if (localStorage.getItem('theme') === 'dark') {
333
- document.body.classList.add('dark-mode');
334
- themeToggle.textContent = '☀️';
335
- }
336
-
337
- // Fetch and display available models
338
- try {
339
- const response = await fetch('/api/models');
340
- const data = await response.json();
341
- const statusDiv = document.getElementById('status');
342
- if (data.status === 'success') {
343
- statusDiv.textContent = `Available models: ${data.models.join(', ')}`;
344
- statusDiv.className = 'status success';
345
- } else {
346
- statusDiv.textContent = `Error loading models: ${data.message}`;
347
- statusDiv.className = 'status error';
348
- }
349
- } catch (error) {
350
- const statusDiv = document.getElementById('status');
351
- statusDiv.textContent = `Error loading models: ${error.message}`;
352
- statusDiv.className = 'status error';
353
- }
354
- });
355
- </script>
356
- </body>
357
- </html>
358
- '''
359
-
360
- @app.route('/')
361
- def home():
362
- """Home page with API documentation."""
363
- return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, allowed_models=', '.join(ALLOWED_MODELS))
364
 
365
  @app.route('/api/models', methods=['GET'])
366
  def list_models():
367
- """List all available models."""
368
  try:
369
  models = ollama_manager.list_models()
370
- logging.info(f"Returning models: {models}")
371
  return jsonify({
372
  "status": "success",
373
  "models": models,
374
  "count": len(models)
375
  })
376
  except Exception as e:
377
- logging.error(f"Models endpoint error: {e}")
378
  return jsonify({"status": "error", "message": str(e)}), 500
379
 
380
  @app.route('/api/models/pull', methods=['POST'])
381
  def pull_model():
382
- """Pull a model from Ollama."""
383
  try:
384
  data = request.get_json()
385
  if not data or 'name' not in data:
386
- logging.warning("Model pull request missing 'name' field")
387
  return jsonify({"status": "error", "message": "Model name is required"}), 400
388
 
389
  model_name = data['name']
390
  if model_name not in ALLOWED_MODELS:
391
- logging.warning(f"Attempted to pull unauthorized model: {model_name}")
392
  return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
393
 
394
  result = ollama_manager.pull_model(model_name)
395
- return jsonify(result), 200 if result["status"] == "success" else 500
 
 
 
396
  except Exception as e:
397
- logging.error(f"Pull model endpoint error: {e}")
398
  return jsonify({"status": "error", "message": str(e)}), 500
399
 
400
  @app.route('/api/generate', methods=['POST'])
401
  def generate_text():
402
- """Generate text using a model, with optional streaming."""
403
  try:
404
  data = request.get_json()
405
  if not data or 'model' not in data or 'prompt' not in data:
406
- logging.warning("Generate request missing 'model' or 'prompt' field")
407
  return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
408
 
409
  model_name = data['model']
410
  prompt = data['prompt']
411
- stream = data.get('stream', False)
412
- kwargs = {k: v for k in data if k not in ['model', 'prompt', 'stream']}
413
 
414
- result = ollama_manager.generate(model_name, prompt, stream=stream, **kwargs)
 
415
 
416
- if stream and isinstance(result, requests.Response):
417
- def generate_stream():
418
- try:
419
- for chunk in result.iter_content(chunk_size=None):
420
- yield chunk
421
- except Exception as e:
422
- logging.error(f"Streaming error: {e}")
423
- yield json.dumps({"status": "error", "message": str(e)}).encode()
424
- return Response(generate_stream(), content_type='application/json')
425
  else:
426
- return jsonify(result), 200 if result["status"] == "success" else 500
427
  except Exception as e:
428
- logging.error(f"Generate endpoint error: {e}")
429
  return jsonify({"status": "error", "message": str(e)}), 500
430
 
431
  @app.route('/health', methods=['GET'])
432
  def health_check():
433
- """Health check endpoint."""
434
  try:
 
435
  response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
436
- response.raise_for_status()
437
- logging.info("Health check successful")
438
- return jsonify({
439
- "status": "healthy",
440
- "ollama_connection": "connected",
441
- "available_models": len(ollama_manager.available_models),
442
- "timestamp": time.time(),
443
- "hostname": socket.gethostname()
444
- })
445
- except requests.exceptions.ConnectionError as e:
446
- logging.error(f"Health check connection error: {e}")
447
- return jsonify({
448
- "status": "unhealthy",
449
- "ollama_connection": "failed",
450
- "error": f"Connection error: {str(e)}",
451
- "timestamp": time.time(),
452
- "hostname": socket.gethostname()
453
- }), 503
454
- except requests.exceptions.HTTPError as e:
455
- logging.error(f"Health check HTTP error: {e}")
456
- return jsonify({
457
- "status": "unhealthy",
458
- "ollama_connection": "failed",
459
- "error": f"HTTP error: {str(e)}",
460
- "timestamp": time.time(),
461
- "hostname": socket.gethostname()
462
- }), 503
463
  except Exception as e:
464
- logging.error(f"Health check unexpected error: {e}")
465
  return jsonify({
466
  "status": "unhealthy",
467
  "ollama_connection": "failed",
468
- "error": str(e),
469
- "timestamp": time.time(),
470
- "hostname": socket.gethostname()
471
  }), 503
472
 
473
  if __name__ == '__main__':
474
- app.run(host='0.0.0.0', port=7860, debug=False)
 
1
+ from flask import Flask, request, jsonify
 
2
  import os
3
+ import subprocess
4
  import json
5
  import logging
 
6
  from typing import Dict, Any, List
7
+ import requests
 
8
 
9
  app = Flask(__name__)
10
+ logging.basicConfig(level=logging.INFO)
 
 
 
 
 
11
 
12
  # Configuration
13
  OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
14
+ MODELS_DIR = os.getenv('MODELS_DIR', '/models')
15
  ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
 
 
16
 
17
  class OllamaManager:
18
  def __init__(self, base_url: str):
19
+ self.base_url = base_url
20
+ self.available_models = []
21
  self.refresh_models()
22
 
23
+ def refresh_models(self):
24
+ """Refresh the list of available models"""
25
  try:
26
  response = requests.get(f"{self.base_url}/api/tags", timeout=10)
27
+ if response.status_code == 200:
28
+ data = response.json()
29
+ self.available_models = [model['name'] for model in data.get('models', [])]
30
+ else:
31
+ self.available_models = []
 
 
 
 
 
 
 
 
 
 
32
  except Exception as e:
33
+ logging.error(f"Error refreshing models: {e}")
34
+ self.available_models = []
35
 
36
  def list_models(self) -> List[str]:
37
+ """List all available models"""
38
+ self.refresh_models()
39
  return self.available_models
40
 
41
  def pull_model(self, model_name: str) -> Dict[str, Any]:
42
+ """Pull a model from Ollama"""
 
 
 
 
43
  try:
44
+ response = requests.post(f"{self.base_url}/api/pull",
45
+ json={"name": model_name},
46
+ timeout=300)
47
+ if response.status_code == 200:
48
+ return {"status": "success", "model": model_name}
49
+ else:
50
+ return {"status": "error", "message": f"Failed to pull model: {response.text}"}
 
 
 
 
51
  except Exception as e:
 
52
  return {"status": "error", "message": str(e)}
53
 
54
+ def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
55
+ """Generate text using a model"""
 
 
 
 
56
  try:
57
  payload = {
58
  "model": model_name,
59
  "prompt": prompt,
60
+ "stream": False
 
61
  }
62
+ payload.update(kwargs)
63
+
64
+ response = requests.post(f"{self.base_url}/api/generate",
65
+ json=payload,
66
+ timeout=120)
67
+
68
+ if response.status_code == 200:
69
  data = response.json()
 
70
  return {
71
  "status": "success",
72
  "response": data.get('response', ''),
73
  "model": model_name,
74
  "usage": data.get('usage', {})
75
  }
76
+ else:
77
+ return {"status": "error", "message": f"Generation failed: {response.text}"}
 
 
 
 
78
  except Exception as e:
 
79
  return {"status": "error", "message": str(e)}
80
 
81
  # Initialize Ollama manager
82
  ollama_manager = OllamaManager(OLLAMA_BASE_URL)
83
 
84
+ @app.route('/')
85
+ def home():
86
+ """Home page with API documentation"""
87
+ return '''
88
+ <!DOCTYPE html>
89
+ <html>
90
+ <head>
91
+ <title>Ollama API Space</title>
92
+ <style>
93
+ body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
94
+ .endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
95
+ .method { background: #007bff; color: white; padding: 2px 8px; border-radius: 3px; font-size: 12px; }
96
+ .url { font-family: monospace; background: #e9ecef; padding: 2px 6px; border-radius: 3px; }
97
+ </style>
98
+ </head>
99
+ <body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  <h1>🚀 Ollama API Space</h1>
101
+ <p>This Space provides API endpoints for Ollama model management and inference.</p>
102
 
103
  <h2>Available Endpoints</h2>
104
 
105
  <div class="endpoint">
106
  <span class="method">GET</span> <span class="url">/api/models</span>
107
+ <p>List all available models</p>
 
108
  </div>
109
 
110
  <div class="endpoint">
111
  <span class="method">POST</span> <span class="url">/api/models/pull</span>
112
+ <p>Pull a model from Ollama</p>
113
+ <p>Body: {"name": "model_name"}</p>
 
114
  </div>
115
 
116
  <div class="endpoint">
117
  <span class="method">POST</span> <span class="url">/api/generate</span>
118
+ <p>Generate text using a model</p>
119
+ <p>Body: {"model": "model_name", "prompt": "your prompt"}</p>
 
 
120
  </div>
121
 
122
  <div class="endpoint">
123
  <span class="method">GET</span> <span class="url">/health</span>
124
+ <p>Health check endpoint</p>
 
125
  </div>
126
 
127
  <h2>Usage Examples</h2>
128
+ <p>You can use this API with OpenWebUI or any other client that supports REST APIs.</p>
129
 
130
  <h3>cURL Examples</h3>
131
  <pre>
132
  # List models
133
+ curl https://your-space-url.hf.space/api/models
 
 
 
 
 
 
 
 
 
 
134
 
135
+ # Generate text
136
+ curl -X POST https://your-space-url.hf.space/api/generate \
137
  -H "Content-Type: application/json" \
138
+ -d '{"model": "llama2", "prompt": "Hello, how are you?"}'
139
  </pre>
140
+ </body>
141
+ </html>
142
+ '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  @app.route('/api/models', methods=['GET'])
145
  def list_models():
146
+ """List all available models"""
147
  try:
148
  models = ollama_manager.list_models()
 
149
  return jsonify({
150
  "status": "success",
151
  "models": models,
152
  "count": len(models)
153
  })
154
  except Exception as e:
 
155
  return jsonify({"status": "error", "message": str(e)}), 500
156
 
157
  @app.route('/api/models/pull', methods=['POST'])
158
  def pull_model():
159
+ """Pull a model from Ollama"""
160
  try:
161
  data = request.get_json()
162
  if not data or 'name' not in data:
 
163
  return jsonify({"status": "error", "message": "Model name is required"}), 400
164
 
165
  model_name = data['name']
166
  if model_name not in ALLOWED_MODELS:
 
167
  return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
168
 
169
  result = ollama_manager.pull_model(model_name)
170
+ if result["status"] == "success":
171
+ return jsonify(result), 200
172
+ else:
173
+ return jsonify(result), 500
174
  except Exception as e:
 
175
  return jsonify({"status": "error", "message": str(e)}), 500
176
 
177
  @app.route('/api/generate', methods=['POST'])
178
  def generate_text():
179
+ """Generate text using a model"""
180
  try:
181
  data = request.get_json()
182
  if not data or 'model' not in data or 'prompt' not in data:
 
183
  return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
184
 
185
  model_name = data['model']
186
  prompt = data['prompt']
 
 
187
 
188
+ # Remove additional parameters that might be passed
189
+ kwargs = {k: v for k, v in data.items() if k not in ['model', 'prompt']}
190
 
191
+ result = ollama_manager.generate(model_name, prompt, **kwargs)
192
+ if result["status"] == "success":
193
+ return jsonify(result), 200
 
 
 
 
 
 
194
  else:
195
+ return jsonify(result), 500
196
  except Exception as e:
 
197
  return jsonify({"status": "error", "message": str(e)}), 500
198
 
199
  @app.route('/health', methods=['GET'])
200
  def health_check():
201
+ """Health check endpoint"""
202
  try:
203
+ # Try to connect to Ollama
204
  response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
205
+ if response.status_code == 200:
206
+ return jsonify({
207
+ "status": "healthy",
208
+ "ollama_connection": "connected",
209
+ "available_models": len(ollama_manager.available_models)
210
+ })
211
+ else:
212
+ return jsonify({
213
+ "status": "unhealthy",
214
+ "ollama_connection": "failed",
215
+ "error": f"Ollama returned status {response.status_code}"
216
+ }), 503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  except Exception as e:
 
218
  return jsonify({
219
  "status": "unhealthy",
220
  "ollama_connection": "failed",
221
+ "error": str(e)
 
 
222
  }), 503
223
 
224
  if __name__ == '__main__':
225
+ app.run(host='0.0.0.0', port=7860, debug=False)