Spaces:

alrahrooh
/

cgt-llm-chatbot

Runtime error

App Files Files Community

arahrooh commited on 14 days ago

Commit

ecb9bc5

1 Parent(s): 45ed548

Use Hugging Face Inference API on Spaces instead of loading models locally

Browse files

Files changed (1) hide show

app.py +193 -23

app.py CHANGED Viewed

@@ -23,13 +23,21 @@ import gradio as gr
 import argparse
 import sys
 import os
-from typing import Tuple, Optional
 import logging
 import textstat
 import torch
 # Import from bot.py
-from bot import RAGBot, parse_args
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -159,12 +167,151 @@ EXAMPLE_QUESTIONS = [
 ]
 class GradioRAGInterface:
     """Wrapper class to integrate RAGBot with Gradio"""
-    def __init__(self, initial_bot: RAGBot):
-        self.bot = initial_bot
-        self.current_model = initial_bot.args.model
         self.data_dir = initial_bot.args.data_dir
         logger.info("GradioRAGInterface initialized")
@@ -194,22 +341,29 @@ class GradioRAGInterface:
             return f"Model already loaded: {model_short_name}"
         try:
-            logger.info(f"Reloading model from {self.current_model} to {new_model_path}")
-            # Update args
-            self.bot.args.model = new_model_path
-            # Clear old model from memory
-            if self.bot.model is not None:
-                del self.bot.model
-                del self.bot.tokenizer
-                torch.cuda.empty_cache() if torch.cuda.is_available() else None
-            # Load new model
-            self.bot._load_model()
-            self.current_model = new_model_path
-            return f"✓ Model loaded: {model_short_name}"
         except Exception as e:
             logger.error(f"Error reloading model: {e}", exc_info=True)
             return f"✗ Error loading model: {str(e)}"
@@ -394,10 +548,14 @@ SOURCE {i+1} | Similarity: {score:.3f}
             )
-def create_interface(initial_bot: RAGBot) -> gr.Blocks:
     """Create and configure the Gradio interface"""
-    interface = GradioRAGInterface(initial_bot)
     # Get initial model name from bot
     initial_model_short = None
@@ -687,8 +845,20 @@ def create_demo_for_spaces():
         parser.add_argument('--seed', type=int, default=42)
         args = parser.parse_args([])  # Empty args for Spaces
         bot = RAGBot(args)
-        return create_interface(bot)
     except Exception as e:
         logger.error(f"Error creating demo for Spaces: {e}", exc_info=True)
         # Return a simple error demo

 import argparse
 import sys
 import os
+from typing import Tuple, Optional, List
 import logging
 import textstat
 import torch
 # Import from bot.py
+from bot import RAGBot, parse_args, Chunk
+# For Hugging Face Inference API
+try:
+    from huggingface_hub import InferenceClient
+    HF_INFERENCE_AVAILABLE = True
+except ImportError:
+    HF_INFERENCE_AVAILABLE = False
+    logger.warning("huggingface_hub not available, InferenceClient will not work")
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 ]
+class InferenceAPIBot:
+    """Wrapper that uses Hugging Face Inference API instead of loading models locally"""
+    def __init__(self, bot: RAGBot, hf_token: str):
+        """Initialize with a RAGBot (for vector DB) and HF token for Inference API"""
+        self.bot = bot  # Use bot for vector DB and formatting
+        self.client = InferenceClient(api_key=hf_token)
+        self.current_model = bot.args.model
+        logger.info(f"InferenceAPIBot initialized with model: {self.current_model}")
+    def generate_answer(self, prompt: str, **kwargs) -> str:
+        """Generate answer using Inference API"""
+        try:
+            # Convert prompt to chat format
+            messages = [{"role": "user", "content": prompt}]
+            # Call Inference API
+            completion = self.client.chat.completions.create(
+                model=self.current_model,
+                messages=messages,
+                max_tokens=kwargs.get('max_new_tokens', 512),
+                temperature=kwargs.get('temperature', 0.2),
+                top_p=kwargs.get('top_p', 0.9),
+            )
+            answer = completion.choices[0].message.content
+            return answer
+        except Exception as e:
+            logger.error(f"Error calling Inference API: {e}", exc_info=True)
+            return f"Error generating answer: {str(e)}"
+    def enhance_readability(self, answer: str, target_level: str = "middle_school") -> Tuple[str, float]:
+        """Enhance readability using Inference API"""
+        try:
+            # Define prompts for different reading levels (same as bot.py)
+            if target_level == "middle_school":
+                level_description = "middle school reading level (ages 12-14, 6th-8th grade)"
+                instructions = """
+- Use simpler medical terms or explain them
+- Medium-length sentences
+- Clear, structured explanations
+- Keep important medical information accessible"""
+            elif target_level == "high_school":
+                level_description = "high school reading level (ages 15-18, 9th-12th grade)"
+                instructions = """
+- Use appropriate medical terminology with context
+- Varied sentence length
+- Comprehensive yet accessible explanations
+- Maintain technical accuracy while ensuring clarity"""
+            elif target_level == "college":
+                level_description = "college reading level (undergraduate level, ages 18-22)"
+                instructions = """
+- Use standard medical terminology with brief explanations
+- Professional and clear writing style
+- Include relevant clinical context
+- Maintain scientific accuracy and precision
+- Appropriate for undergraduate students in health sciences"""
+            elif target_level == "doctoral":
+                level_description = "doctoral/professional reading level (graduate level, medical professionals)"
+                instructions = """
+- Use advanced medical and scientific terminology
+- Include detailed clinical and research context
+- Reference specific mechanisms, pathways, and evidence
+- Provide comprehensive technical explanations
+- Appropriate for medical professionals, researchers, and graduate students
+- Include nuanced discussions of clinical implications and research findings"""
+            else:
+                raise ValueError(f"Unknown target_level: {target_level}")
+            # Create messages for chat API
+            system_message = f"""You are a helpful medical assistant who specializes in explaining complex medical information at appropriate reading levels. Rewrite the following medical answer for {level_description}:
+{instructions}
+- Keep the same important information but adapt the complexity
+- Provide context for technical terms
+- Ensure the answer is informative yet understandable"""
+            user_message = f"Please rewrite this medical answer for {level_description}:\n\n{answer}"
+            messages = [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": user_message}
+            ]
+            # Call Inference API
+            completion = self.client.chat.completions.create(
+                model=self.current_model,
+                messages=messages,
+                max_tokens=512 if target_level in ["college", "doctoral"] else 384,
+                temperature=0.4 if target_level in ["college", "doctoral"] else 0.3,
+            )
+            enhanced_answer = completion.choices[0].message.content
+            # Clean the answer (same as bot.py)
+            cleaned = self.bot._clean_readability_answer(enhanced_answer, target_level)
+            # Calculate Flesch score
+            try:
+                flesch_score = textstat.flesch_kincaid_grade(cleaned)
+            except:
+                flesch_score = 0.0
+            return cleaned, flesch_score
+        except Exception as e:
+            logger.error(f"Error enhancing readability: {e}", exc_info=True)
+            return answer, 0.0
+    # Delegate other methods to bot
+    def format_prompt(self, context_chunks: List[Chunk], question: str) -> str:
+        return self.bot.format_prompt(context_chunks, question)
+    def retrieve_with_scores(self, query: str, k: int) -> Tuple[List[Chunk], List[float]]:
+        return self.bot.retrieve_with_scores(query, k)
+    def _categorize_question(self, question: str) -> str:
+        return self.bot._categorize_question(question)
+    @property
+    def args(self):
+        return self.bot.args
+    @property
+    def vector_retriever(self):
+        return self.bot.vector_retriever
 class GradioRAGInterface:
     """Wrapper class to integrate RAGBot with Gradio"""
+    def __init__(self, initial_bot: RAGBot, use_inference_api: bool = False):
+        # Check if we should use Inference API (on Spaces)
+        if use_inference_api and HF_INFERENCE_AVAILABLE:
+            hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
+            if hf_token:
+                self.bot = InferenceAPIBot(initial_bot, hf_token)
+                self.use_inference_api = True
+                logger.info("Using Hugging Face Inference API")
+            else:
+                logger.warning("HF_TOKEN not found, falling back to local model")
+                self.bot = initial_bot
+                self.use_inference_api = False
+        else:
+            self.bot = initial_bot
+            self.use_inference_api = False
+        self.current_model = self.bot.current_model
         self.data_dir = initial_bot.args.data_dir
         logger.info("GradioRAGInterface initialized")
             return f"Model already loaded: {model_short_name}"
         try:
+            logger.info(f"Switching model from {self.current_model} to {new_model_path}")
+            if self.use_inference_api:
+                # For Inference API, just update the model name
+                self.bot.current_model = new_model_path
+                self.current_model = new_model_path
+                return f"✓ Model switched to: {model_short_name} (using Inference API)"
+            else:
+                # For local model, reload it
+                # Update args
+                self.bot.args.model = new_model_path
+                # Clear old model from memory
+                if hasattr(self.bot, 'model') and self.bot.model is not None:
+                    del self.bot.model
+                    del self.bot.tokenizer
+                    torch.cuda.empty_cache() if torch.cuda.is_available() else None
+                # Load new model
+                self.bot._load_model()
+                self.current_model = new_model_path
+                return f"✓ Model loaded: {model_short_name}"
         except Exception as e:
             logger.error(f"Error reloading model: {e}", exc_info=True)
             return f"✗ Error loading model: {str(e)}"
             )
+def create_interface(initial_bot: RAGBot, use_inference_api: bool = False) -> gr.Blocks:
     """Create and configure the Gradio interface"""
+    # Use Inference API on Spaces, local model otherwise
+    if use_inference_api is None:
+        use_inference_api = os.getenv("SPACE_ID") is not None or os.getenv("SYSTEM") == "spaces"
+    interface = GradioRAGInterface(initial_bot, use_inference_api=use_inference_api)
     # Get initial model name from bot
     initial_model_short = None
         parser.add_argument('--seed', type=int, default=42)
         args = parser.parse_args([])  # Empty args for Spaces
+        # Create bot but skip model loading (we'll use Inference API)
+        # We still need the vector database
+        # Set a flag to skip model loading
+        args.skip_model_loading = True
         bot = RAGBot(args)
+        # Don't load the model - we'll use Inference API
+        # Just verify vector DB is available
+        if bot.vector_retriever is None:
+            raise Exception("Vector database not available")
+        # Use Inference API instead of loading model
+        return create_interface(bot, use_inference_api=True)
     except Exception as e:
         logger.error(f"Error creating demo for Spaces: {e}", exc_info=True)
         # Return a simple error demo