Spaces:

SRP-base-model-training
/

Multilingual-Translation-App

Runtime error

App Files Files Community

Beibars003 commited on Jul 3

Commit

921eb67

verified ·

1 Parent(s): 12cb373

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -72

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import os
 import sys
 from typing import List, Tuple
 from llama_cpp import Llama
@@ -9,6 +11,9 @@ from llama_cpp_agent.chat_history.messages import Roles
 from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
 from huggingface_hub import hf_hub_download
 import gradio as gr
 # Load the Environment Variables from .env file
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -23,43 +28,32 @@ hf_hub_download(
     local_dir="./models",
 )
 # Define the prompt markers for Gemma 3
 gemma_3_prompt_markers = {
-    Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),
     Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
     Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
-    Roles.tool: PromptMarkers("", ""),
 }
 gemma_3_formatter = MessagesFormatter(
-    pre_prompt="",
     prompt_markers=gemma_3_prompt_markers,
-    include_sys_prompt_in_first_user_message=True,
     default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
-    strip_prompt=False,
-    bos_token="<bos>",
-    eos_token="<eos>",
 )
-# Translation direction to prompts mapping
-direction_to_prompts = {
-    "English to Kazakh": {
-        "system": "You are a professional translator. Translate the following sentence into қазақ.",
-        "prefix": "<src=en><tgt=kk>"
-    },
-    "Kazakh to English": {
-        "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді English тіліне аударыңыз.",
-        "prefix": "<src=kk><tgt=en>"
-    },
-    "Kazakh to Russian": {
-        "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді орыс тіліне аударыңыз.",
-        "prefix": "<src=kk><tgt=ru>"
-    },
-    "Russian to Kazakh": {
-        "system": "Вы профессиональный переводчик. Переведите следующее предложение на қазақ язык.",
-        "prefix": "<src=ru><tgt=kk>"
-    }
-}
 llm = None
 llm_model = None
@@ -67,42 +61,48 @@ llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
-    direction: str = "English to Kazakh",
-    max_tokens: int = 1024,
     temperature: float = 0.7,
     top_p: float = 0.95,
     top_k: int = 40,
     repeat_penalty: float = 1.1,
 ):
     """
-    Respond to a message by translating it using the specified direction.
     Args:
-        message (str): The text to translate.
-        history (List[Tuple[str, str]]): The chat history.
-        direction (str): The translation direction (e.g., "English to Kazakh").
-        model (str): The model file to use.
-        max_tokens (int): Maximum number of tokens to generate.
-        temperature (float): Sampling temperature.
-        top_p (float): Top-p sampling parameter.
-        top_k (int): Top-k sampling parameter.
-        repeat_penalty (float): Penalty for repetition.
-    Yields:
-        str: The translated text as it is generated.
     """
     if model is None:
         model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
-    global llm, llm_model
     if llm is None or llm_model != model:
         model_path = f"models/{model}"
         if not os.path.exists(model_path):
-            yield f"Error: Model file not found at {model_path}."
             return
         llm = Llama(
-            model_path=model_path,
             flash_attn=False,
             n_gpu_layers=0,
             n_batch=8,
@@ -113,18 +113,15 @@ def respond(
         llm_model = model
     provider = LlamaCppPythonProvider(llm)
-    # Get system prompt and user prefix based on direction
-    prompts = direction_to_prompts[direction]
-    system_message = prompts["system"]
-    user_prefix = prompts["prefix"]
     agent = LlamaCppAgent(
         provider,
-        system_prompt=system_message,
         custom_messages_formatter=gemma_3_formatter,
         debug_output=True,
     )
     settings = provider.get_provider_default_settings()
     settings.temperature = temperature
     settings.top_k = top_k
@@ -134,35 +131,59 @@ def respond(
     settings.stream = True
     messages = BasicChatHistory()
-    for user_msg, assistant_msg in history:
-        full_user_msg = user_prefix + " " + user_msg
-        messages.add_message({"role": Roles.user, "content": full_user_msg})
-        messages.add_message({"role": Roles.assistant, "content": assistant_msg})
-    full_message = user_prefix + " " + message
     stream = agent.get_chat_response(
-        full_message,
         llm_sampling_settings=settings,
         chat_history=messages,
         returns_streaming_generator=True,
         print_output=False,
     )
     outputs = ""
     for output in stream:
         outputs += output
         yield outputs
 demo = gr.ChatInterface(
     respond,
-    examples=[["Hello"], ["Сәлем"], ["Привет"]],
-    additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
     additional_inputs=[
         gr.Dropdown(
-            choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
-            label="Translation Direction",
-            info="Select the direction of translation"
         ),
         gr.Slider(
             minimum=512,
@@ -170,7 +191,7 @@ demo = gr.ChatInterface(
             value=1024,
             step=1,
             label="Max Tokens",
-            info="Maximum length of the translation"
         ),
         gr.Slider(
             minimum=0.1,
@@ -178,7 +199,7 @@ demo = gr.ChatInterface(
             value=0.7,
             step=0.1,
             label="Temperature",
-            info="Controls randomness (higher = more creative)"
         ),
         gr.Slider(
             minimum=0.1,
@@ -186,7 +207,7 @@ demo = gr.ChatInterface(
             value=0.95,
             step=0.05,
             label="Top-p",
-            info="Nucleus sampling threshold"
         ),
         gr.Slider(
             minimum=1,
@@ -194,7 +215,7 @@ demo = gr.ChatInterface(
             value=40,
             step=1,
             label="Top-k",
-            info="Limits vocabulary to top K tokens"
         ),
         gr.Slider(
             minimum=1.0,
@@ -202,22 +223,24 @@ demo = gr.ChatInterface(
             value=1.1,
             step=0.1,
             label="Repetition Penalty",
-            info="Penalizes repeated words"
         ),
     ],
     theme="Ocean",
-    submit_btn="Translate",
     stop_btn="Stop",
-    title="Kazakh Translation Model",
-    description="Translate text between Kazakh, English, and Russian using a specialized language model.",
     chatbot=gr.Chatbot(scale=1, show_copy_button=True),
     cache_examples=False,
 )
 if __name__ == "__main__":
     demo.launch(
         share=False,
         server_name="0.0.0.0",
         server_port=7860,
         show_api=False,
-    )

 import os
+import json
+import subprocess
 import sys
 from typing import List, Tuple
 from llama_cpp import Llama
 from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
 from huggingface_hub import hf_hub_download
 import gradio as gr
+# from logger import logging
+# from exception import CustomExceptionHandling
 # Load the Environment Variables from .env file
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
     local_dir="./models",
 )
 # Define the prompt markers for Gemma 3
 gemma_3_prompt_markers = {
+    Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),  # System prompt should be included within user message
     Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
     Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
+    Roles.tool: PromptMarkers("", ""),  # If you need tool support
 }
+# Create the formatter
 gemma_3_formatter = MessagesFormatter(
+    pre_prompt="",  # No pre-prompt
     prompt_markers=gemma_3_prompt_markers,
+    include_sys_prompt_in_first_user_message=True,  # Include system prompt in first user message
     default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
+    strip_prompt=False,  # Don't strip whitespace from the prompt
+    bos_token="<bos>",  # Beginning of sequence token for Gemma 3
+    eos_token="<eos>",  # End of sequence token for Gemma 3
 )
+# Set the title and description
+title = "Kazakh Language Model"
+description = """"""
 llm = None
 llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
+    model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",  # Set default model
+    system_message: str = "",
+    max_tokens: int = 64,
     temperature: float = 0.7,
     top_p: float = 0.95,
     top_k: int = 40,
     repeat_penalty: float = 1.1,
 ):
     """
+    Respond to a message using the Gemma3 model via Llama.cpp.
     Args:
+        - message (str): The message to respond to.
+        - history (List[Tuple[str, str]]): The chat history.
+        - model (str): The model to use.
+        - system_message (str): The system message to use.
+        - max_tokens (int): The maximum number of tokens to generate.
+        - temperature (float): The temperature of the model.
+        - top_p (float): The top-p of the model.
+        - top_k (int): The top-k of the model.
+        - repeat_penalty (float): The repetition penalty of the model.
+    Returns:
+        str: The response to the message.
     """
+    # try:
+    # Load the global variables
+    global llm
+    global llm_model
+    # Ensure model is not None
     if model is None:
         model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
+    # Load the model
     if llm is None or llm_model != model:
+        # Check if model file exists
         model_path = f"models/{model}"
         if not os.path.exists(model_path):
+            yield f"Error: Model file not found at {model_path}. Please check your model path."
             return
         llm = Llama(
+            model_path=f"models/{model}",
             flash_attn=False,
             n_gpu_layers=0,
             n_batch=8,
         llm_model = model
     provider = LlamaCppPythonProvider(llm)
+    # Create the agent
     agent = LlamaCppAgent(
         provider,
+        system_prompt=f"{system_message}",
         custom_messages_formatter=gemma_3_formatter,
         debug_output=True,
     )
+    # Set the settings like temperature, top-k, top-p, max tokens, etc.
     settings = provider.get_provider_default_settings()
     settings.temperature = temperature
     settings.top_k = top_k
     settings.stream = True
     messages = BasicChatHistory()
+    # Add the chat history
+    for msn in history:
+        user = {"role": Roles.user, "content": msn[0]}
+        assistant = {"role": Roles.assistant, "content": msn[1]}
+        messages.add_message(user)
+        messages.add_message(assistant)
+    # Get the response stream
     stream = agent.get_chat_response(
+        message,
         llm_sampling_settings=settings,
         chat_history=messages,
         returns_streaming_generator=True,
         print_output=False,
     )
+    # Log the success
+    # logging.info("Response stream generated successfully")
+    # Generate the response
     outputs = ""
     for output in stream:
         outputs += output
         yield outputs
+    # # Handle exceptions that may occur during the process
+    # except Exception as e:
+    #     # Custom exception handling
+    #     raise CustomExceptionHandling(e, sys) from e
+# Create a chat interface
 demo = gr.ChatInterface(
     respond,
+    examples=[["Сәлем"], ["Привет"], ["Hello"]],
+    additional_inputs_accordion=gr.Accordion(
+        label="⚙️ Parameters", open=False, render=False
+    ),
     additional_inputs=[
         gr.Dropdown(
+            choices=[
+                "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
+            ],
+            value="gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
+            label="Model",
+            info="Select the AI model to use for chat",
+        ),
+        gr.Textbox(
+            value="You are a helpful assistant.",
+            label="System Prompt",
+            info="Define the AI assistant's personality and behavior",
+            lines=2,
         ),
         gr.Slider(
             minimum=512,
             value=1024,
             step=1,
             label="Max Tokens",
+            info="Maximum length of response (higher = longer replies)",
         ),
         gr.Slider(
             minimum=0.1,
             value=0.7,
             step=0.1,
             label="Temperature",
+            info="Creativity level (higher = more creative, lower = more focused)",
         ),
         gr.Slider(
             minimum=0.1,
             value=0.95,
             step=0.05,
             label="Top-p",
+            info="Nucleus sampling threshold",
         ),
         gr.Slider(
             minimum=1,
             value=40,
             step=1,
             label="Top-k",
+            info="Limit vocabulary choices to top K tokens",
         ),
         gr.Slider(
             minimum=1.0,
             value=1.1,
             step=0.1,
             label="Repetition Penalty",
+            info="Penalize repeated words (higher = less repetition)",
         ),
     ],
     theme="Ocean",
+    submit_btn="Send",
     stop_btn="Stop",
+    title=title,
+    description=description,
     chatbot=gr.Chatbot(scale=1, show_copy_button=True),
     cache_examples=False,
 )
+# Launch the chat interface
 if __name__ == "__main__":
     demo.launch(
         share=False,
         server_name="0.0.0.0",
         server_port=7860,
         show_api=False,
+    )