Spaces:

SRP-base-model-training
/

Multilingual-Translation-App

Runtime error

App Files Files Community

Beibars003 commited on Jul 2

Commit

97d0043

verified ·

1 Parent(s): fb8e4a3

Update app.py

Browse files

Files changed (1) hide show

app.py +210 -185

app.py CHANGED Viewed

@@ -1,205 +1,230 @@
 import gradio as gr
-import time
-import re
-from tqdm import tqdm
-from openai import OpenAI
-instructs = {'eng': 'English',
-            'kaz': 'Kazakh',
-            'rus': 'Russian',
-            'tur': 'Turkish',
-            'uzn': 'Uzbek',
-            'zho_simpl': 'Chinese (Simplified)'}
-openai_api_key = "EMPTY"
-openai_api_base = "http://localhost:7050/v1"
-model_path = "gemma_translator"
-client = OpenAI(
-    api_key=openai_api_key,
-    base_url=openai_api_base,
-)
-def build_prompt_alpaca(instruction: str, input_text: str = "") -> str:
-    return (
-        "<bos>Below is an instruction that describes a task, paired with an input that provides further context. "
-        "Write a response that appropriately completes the request.\n\n"
-        f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n"
-    )
-def get_prediction(instruction, input_text, temperature=0.0, top_p=0.9, max_tokens=1024,
-                  presence_penalty=0.0, frequency_penalty=0.3, repetition_penalty=1.0,
-                  top_k=64, min_p=0.0, retry_count=3, sleep_time=1):
-    for attempt in range(retry_count):
-        try:
-            prompt = build_prompt_alpaca(instruction, input_text)
-            response = client.completions.create(
-                model=model_path,
-                prompt=prompt,
-                temperature=temperature,
-                top_p=top_p,
-                max_tokens=max_tokens,
-                frequency_penalty=frequency_penalty,
-                stream=True
-            )
-            return response
-        except Exception as e:
-            print(f"Error on attempt {attempt+1}: {str(e)}")
-            if attempt < retry_count - 1:
-                print(f"Retrying in {sleep_time} seconds...")
-                time.sleep(sleep_time)
-                sleep_time *= 2  # Exponential backoff
-            else:
-                print("Max retries reached. Returning empty string.")
-                return ""
-def process_streaming_response(response):
-    """Process streaming response and return complete text"""
-    if not response:
-        return "Error: No response received"
-    buffer = ""
-    is_first_chunk = True
-    complete_text = ""
-    try:
-        for chunk in response:
-            if hasattr(chunk, 'choices') and chunk.choices and chunk.choices[0].text:
-                text_content = chunk.choices[0].text
-                if is_first_chunk:
-                    text_content = text_content.lstrip()
-                    if text_content:
-                        text_content = " " + text_content
-                    is_first_chunk = False
-                buffer += text_content
-                complete_text += text_content
-        return complete_text.strip()
-    except Exception as e:
-        return f"Error processing response: {str(e)}"
-def generate_translation(text, target_lang, temperature, top_p, max_tokens,
-                        presence_penalty, frequency_penalty, repetition_penalty,
-                        top_k, min_p, use_v0_prompt=False):
-    """Updated function that accepts all parameters from Gradio"""
-    if not text.strip():
-        return "Please enter some text to translate."
-    text = text.strip()
-    # Build instruction based on target language
-    if use_v0_prompt:
-        instruction = f"Translate the following text into {instructs[target_lang]}."
-    else:
-        instruction = f"Translate to {instructs[target_lang]}"
     try:
-        # Use the get_prediction function for inference
-        response = get_prediction(
-            instruction=instruction,
-            input_text=text,
-            temperature=temperature,
-            top_p=top_p,
-            max_tokens=int(max_tokens),
-            presence_penalty=presence_penalty,
-            frequency_penalty=frequency_penalty,
-            repetition_penalty=repetition_penalty,
-            top_k=int(top_k),
-            min_p=min_p,
-            retry_count=3,
-            sleep_time=1
-        )
-        # Process the streaming response
-        return process_streaming_response(response)
-    except Exception as e:
-        return f"Error: {str(e)}"
-def set_example_text(example_text):
-    """Helper function to set example text"""
-    return example_text
-# Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("## 🌐 Multilingual Translation App")
-    with gr.Row():
-        input_text = gr.Textbox(
-            label="Enter your text",
-            placeholder="Type here and press Enter or click Translate",
-            lines=3
         )
-    with gr.Row():
-        lang_dropdown = gr.Dropdown(
-            choices=list(instructs.keys()),
-            value="kaz",
-            label="Translate to"
         )
-    with gr.Accordion("Advanced Parameters (Optional)", open=False):
-        temperature_slider = gr.Slider(0.0, 1.0, value=0.0, step=0.01, label="Temperature")
-        top_p_slider = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="Top-p")
-        max_tokens_slider = gr.Slider(64, 2048, value=1024, step=1, label="Max Tokens")
-        presence_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.01, label="Presence Penalty", info="Usually left at 0.0")
-        frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.3, step=0.01, label="Frequency Penalty")
-        repetition_penalty_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.01, label="Repetition Penalty", info="Default is 1.0")
-        top_k_slider = gr.Slider(1, 100, value=64, step=1, label="Top-k", info="Default 64")
-        min_p_slider = gr.Slider(0.0, 1.0, value=0.0, step=0.01, label="Min-p", info="Usually 0.0")
-        use_v0_prompt = gr.Checkbox(label="Use v0 Prompt Format", value=False)
-    with gr.Row():
-        submit_btn = gr.Button("Translate", variant="primary")
-    output_text = gr.Textbox(label="Translation Result", lines=4)
-    # Define inputs in the correct order to match the function parameters
-    inputs = [
-        input_text, lang_dropdown,
-        temperature_slider, top_p_slider, max_tokens_slider,
-        presence_penalty_slider, frequency_penalty_slider, repetition_penalty_slider,
-        top_k_slider, min_p_slider, use_v0_prompt
-    ]
-    # Connect the function to the button and text input
-    submit_btn.click(fn=generate_translation, inputs=inputs, outputs=output_text)
-    input_text.submit(fn=generate_translation, inputs=inputs, outputs=output_text)
-    # Example inputs
-    gr.Markdown("### 🔍 Examples:")
-    with gr.Row():
-        examples = [
-            "Hello! How can I help you?",
-            "Hello! how can I help you?",
-            "2 + 2 is?",
-            "Your appointment is on 5th July at 3 PM.",
-            "The total cost is 1250 KZT.",
-            "She was born in 1995."
-        ]
-        for example_text in examples:
-            example_btn = gr.Button(example_text, size="sm")
-            example_btn.click(
-                fn=lambda x=example_text: x,
-                inputs=[],
-                outputs=[input_text]
-            )
 if __name__ == "__main__":
     demo.launch(
         share=False,
         server_name="0.0.0.0",
-        server_port=5482,
         show_api=False,
     )

+import warnings
+warnings.filterwarnings("ignore")
+import os
+import sys
+from typing import List, Tuple
+from llama_cpp import Llama
+from llama_cpp_agent import LlamaCppAgent
+from llama_cpp_agent.providers import LlamaCppPythonProvider
+from llama_cpp_agent.chat_history import BasicChatHistory
+from llama_cpp_agent.chat_history.messages import Roles
+from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
+from huggingface_hub import hf_hub_download
 import gradio as gr
+from logger import logging
+from exception import CustomExceptionHandling
+# Load the Environment Variables from .env file
+huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
+# Download gguf model files
+if not os.path.exists("./models"):
+    os.makedirs("./models")
+hf_hub_download(
+    repo_id="SRP-base-model-training/gemma_3_800M_sft_v2_translation-kazparc_latest",
+    filename="gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
+    local_dir="./models",
+)
+# Define the prompt markers for Gemma 3
+gemma_3_prompt_markers = {
+    Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),
+    Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
+    Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
+    Roles.tool: PromptMarkers("", ""),
+}
+gemma_3_formatter = MessagesFormatter(
+    pre_prompt="",
+    prompt_markers=gemma_3_prompt_markers,
+    include_sys_prompt_in_first_user_message=True,
+    default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
+    strip_prompt=False,
+    bos_token="<bos>",
+    eos_token="<eos>",
+)
+# Translation direction to prompts mapping
+direction_to_prompts = {
+    "English to Kazakh": {
+        "system": "You are a professional translator. Translate the following sentence into қазақ.",
+        "prefix": "<src=en><tgt=kk>"
+    },
+    "Kazakh to English": {
+        "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді English тіліне аударыңыз.",
+        "prefix": "<src=kk><tgt=en>"
+    },
+    "Kazakh to Russian": {
+        "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді орыс тіліне аударыңыз.",
+        "prefix": "<src=kk><tgt=ru>"
+    },
+    "Russian to Kazakh": {
+        "system": "Вы профессиональный переводчик. Переведите следующее предложение на қазақ язык.",
+        "prefix": "<src=ru><tgt=kk>"
+    }
+}
+llm = None
+llm_model = None
+def respond(
+    message: str,
+    history: List[Tuple[str, str]],
+    direction: str,
+    model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
+    max_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 40,
+    repeat_penalty: float = 1.1,
+):
+    """
+    Respond to a message by translating it using the specified direction.
+    Args:
+        message (str): The text to translate.
+        history (List[Tuple[str, str]]): The chat history.
+        direction (str): The translation direction (e.g., "English to Kazakh").
+        model (str): The model file to use.
+        max_tokens (int): Maximum number of tokens to generate.
+        temperature (float): Sampling temperature.
+        top_p (float): Top-p sampling parameter.
+        top_k (int): Top-k sampling parameter.
+        repeat_penalty (float): Penalty for repetition.
+    Yields:
+        str: The translated text as it is generated.
+    """
     try:
+        global llm, llm_model
+        if llm is None or llm_model != model:
+            model_path = f"models/{model}"
+            if not os.path.exists(model_path):
+                yield f"Error: Model file not found at {model_path}."
+                return
+            llm = Llama(
+                model_path=model_path,
+                flash_attn=False,
+                n_gpu_layers=0,
+                n_batch=8,
+                n_ctx=2048,
+                n_threads=8,
+                n_threads_batch=8,
+            )
+            llm_model = model
+        provider = LlamaCppPythonProvider(llm)
+        # Get system prompt and user prefix based on direction
+        prompts = direction_to_prompts[direction]
+        system_message = prompts["system"]
+        user_prefix = prompts["prefix"]
+        agent = LlamaCppAgent(
+            provider,
+            system_prompt=system_message,
+            custom_messages_formatter=gemma_3_formatter,
+            debug_output=True,
         )
+        settings = provider.get_provider_default_settings()
+        settings.temperature = temperature
+        settings.top_k = top_k
+        settings.top_p = top_p
+        settings.max_tokens = max_tokens
+        settings.repeat_penalty = repeat_penalty
+        settings.stream = True
+        messages = BasicChatHistory()
+        for user_msg, assistant_msg in history:
+            full_user_msg = user_prefix + " " + user_msg
+            messages.add_message({"role": Roles.user, "content": full_user_msg})
+            messages.add_message({"role": Roles.assistant, "content": assistant_msg})
+        full_message = user_prefix + " " + message
+        stream = agent.get_chat_response(
+            full_message,
+            llm_sampling_settings=settings,
+            chat_history=messages,
+            returns_streaming_generator=True,
+            print_output=False,
         )
+        logging.info("Response stream generated successfully")
+        outputs = ""
+        for output in stream:
+            outputs += output
+            yield outputs
+    except Exception as e:
+        raise CustomExceptionHandling(e, sys) from e
+demo = gr.ChatInterface(
+    respond,
+    examples=[["Hello"], ["Сәлем"], ["Привет"]],
+    additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
+    additional_inputs=[
+        gr.Dropdown(
+            choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
+            label="Translation Direction",
+            info="Select the direction of translation"
+        ),
+        gr.Slider(
+            minimum=512,
+            maximum=2048,
+            value=1024,
+            step=1,
+            label="Max Tokens",
+            info="Maximum length of the translation"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature",
+            info="Controls randomness (higher = more creative)"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p",
+            info="Nucleus sampling threshold"
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=100,
+            value=40,
+            step=1,
+            label="Top-k",
+            info="Limits vocabulary to top K tokens"
+        ),
+        gr.Slider(
+            minimum=1.0,
+            maximum=2.0,
+            value=1.1,
+            step=0.1,
+            label="Repetition Penalty",
+            info="Penalizes repeated words"
+        ),
+    ],
+    theme="Ocean",
+    submit_btn="Translate",
+    stop_btn="Stop",
+    title="Kazakh Translation Model",
+    description="Translate text between Kazakh, English, and Russian using a specialized language model.",
+    chatbot=gr.Chatbot(scale=1, show_copy_button=True),
+    cache_examples=False,
+)
 if __name__ == "__main__":
     demo.launch(
         share=False,
         server_name="0.0.0.0",
+        server_port=7860,
         show_api=False,
     )