Beibars003 commited on
Commit
921eb67
·
verified ·
1 Parent(s): 12cb373

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -72
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import sys
3
  from typing import List, Tuple
4
  from llama_cpp import Llama
@@ -9,6 +11,9 @@ from llama_cpp_agent.chat_history.messages import Roles
9
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
10
  from huggingface_hub import hf_hub_download
11
  import gradio as gr
 
 
 
12
 
13
  # Load the Environment Variables from .env file
14
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -23,43 +28,32 @@ hf_hub_download(
23
  local_dir="./models",
24
  )
25
 
 
26
  # Define the prompt markers for Gemma 3
27
  gemma_3_prompt_markers = {
28
- Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),
29
  Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
30
  Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
31
- Roles.tool: PromptMarkers("", ""),
 
32
  }
33
 
 
34
  gemma_3_formatter = MessagesFormatter(
35
- pre_prompt="",
36
  prompt_markers=gemma_3_prompt_markers,
37
- include_sys_prompt_in_first_user_message=True,
38
  default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
39
- strip_prompt=False,
40
- bos_token="<bos>",
41
- eos_token="<eos>",
42
  )
43
 
44
- # Translation direction to prompts mapping
45
- direction_to_prompts = {
46
- "English to Kazakh": {
47
- "system": "You are a professional translator. Translate the following sentence into қазақ.",
48
- "prefix": "<src=en><tgt=kk>"
49
- },
50
- "Kazakh to English": {
51
- "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді English тіліне аударыңыз.",
52
- "prefix": "<src=kk><tgt=en>"
53
- },
54
- "Kazakh to Russian": {
55
- "system": "Сіз кәсіби аудармашысыз. Төмендегі сөйлемді орыс тіліне аударыңыз.",
56
- "prefix": "<src=kk><tgt=ru>"
57
- },
58
- "Russian to Kazakh": {
59
- "system": "Вы профессиональный переводчик. Переведите следующее предложение на қазақ язык.",
60
- "prefix": "<src=ru><tgt=kk>"
61
- }
62
- }
63
 
64
  llm = None
65
  llm_model = None
@@ -67,42 +61,48 @@ llm_model = None
67
  def respond(
68
  message: str,
69
  history: List[Tuple[str, str]],
70
- direction: str = "English to Kazakh",
71
- max_tokens: int = 1024,
 
72
  temperature: float = 0.7,
73
  top_p: float = 0.95,
74
  top_k: int = 40,
75
  repeat_penalty: float = 1.1,
76
  ):
77
  """
78
- Respond to a message by translating it using the specified direction.
79
-
80
  Args:
81
- message (str): The text to translate.
82
- history (List[Tuple[str, str]]): The chat history.
83
- direction (str): The translation direction (e.g., "English to Kazakh").
84
- model (str): The model file to use.
85
- max_tokens (int): Maximum number of tokens to generate.
86
- temperature (float): Sampling temperature.
87
- top_p (float): Top-p sampling parameter.
88
- top_k (int): Top-k sampling parameter.
89
- repeat_penalty (float): Penalty for repetition.
90
-
91
- Yields:
92
- str: The translated text as it is generated.
93
  """
 
 
 
 
94
 
 
95
  if model is None:
96
  model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
97
-
98
- global llm, llm_model
99
  if llm is None or llm_model != model:
 
100
  model_path = f"models/{model}"
101
  if not os.path.exists(model_path):
102
- yield f"Error: Model file not found at {model_path}."
103
  return
 
104
  llm = Llama(
105
- model_path=model_path,
106
  flash_attn=False,
107
  n_gpu_layers=0,
108
  n_batch=8,
@@ -113,18 +113,15 @@ def respond(
113
  llm_model = model
114
  provider = LlamaCppPythonProvider(llm)
115
 
116
- # Get system prompt and user prefix based on direction
117
- prompts = direction_to_prompts[direction]
118
- system_message = prompts["system"]
119
- user_prefix = prompts["prefix"]
120
-
121
  agent = LlamaCppAgent(
122
  provider,
123
- system_prompt=system_message,
124
  custom_messages_formatter=gemma_3_formatter,
125
  debug_output=True,
126
  )
127
 
 
128
  settings = provider.get_provider_default_settings()
129
  settings.temperature = temperature
130
  settings.top_k = top_k
@@ -134,35 +131,59 @@ def respond(
134
  settings.stream = True
135
 
136
  messages = BasicChatHistory()
137
- for user_msg, assistant_msg in history:
138
- full_user_msg = user_prefix + " " + user_msg
139
- messages.add_message({"role": Roles.user, "content": full_user_msg})
140
- messages.add_message({"role": Roles.assistant, "content": assistant_msg})
141
 
142
- full_message = user_prefix + " " + message
 
 
 
 
 
143
 
 
144
  stream = agent.get_chat_response(
145
- full_message,
146
  llm_sampling_settings=settings,
147
  chat_history=messages,
148
  returns_streaming_generator=True,
149
  print_output=False,
150
  )
151
 
 
 
 
 
152
  outputs = ""
153
  for output in stream:
154
  outputs += output
155
  yield outputs
156
 
 
 
 
 
 
 
 
157
  demo = gr.ChatInterface(
158
  respond,
159
- examples=[["Hello"], ["Сәлем"], ["Привет"]],
160
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
 
 
161
  additional_inputs=[
162
  gr.Dropdown(
163
- choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
164
- label="Translation Direction",
165
- info="Select the direction of translation"
 
 
 
 
 
 
 
 
 
166
  ),
167
  gr.Slider(
168
  minimum=512,
@@ -170,7 +191,7 @@ demo = gr.ChatInterface(
170
  value=1024,
171
  step=1,
172
  label="Max Tokens",
173
- info="Maximum length of the translation"
174
  ),
175
  gr.Slider(
176
  minimum=0.1,
@@ -178,7 +199,7 @@ demo = gr.ChatInterface(
178
  value=0.7,
179
  step=0.1,
180
  label="Temperature",
181
- info="Controls randomness (higher = more creative)"
182
  ),
183
  gr.Slider(
184
  minimum=0.1,
@@ -186,7 +207,7 @@ demo = gr.ChatInterface(
186
  value=0.95,
187
  step=0.05,
188
  label="Top-p",
189
- info="Nucleus sampling threshold"
190
  ),
191
  gr.Slider(
192
  minimum=1,
@@ -194,7 +215,7 @@ demo = gr.ChatInterface(
194
  value=40,
195
  step=1,
196
  label="Top-k",
197
- info="Limits vocabulary to top K tokens"
198
  ),
199
  gr.Slider(
200
  minimum=1.0,
@@ -202,22 +223,24 @@ demo = gr.ChatInterface(
202
  value=1.1,
203
  step=0.1,
204
  label="Repetition Penalty",
205
- info="Penalizes repeated words"
206
  ),
207
  ],
208
  theme="Ocean",
209
- submit_btn="Translate",
210
  stop_btn="Stop",
211
- title="Kazakh Translation Model",
212
- description="Translate text between Kazakh, English, and Russian using a specialized language model.",
213
  chatbot=gr.Chatbot(scale=1, show_copy_button=True),
214
  cache_examples=False,
215
  )
216
 
 
 
217
  if __name__ == "__main__":
218
  demo.launch(
219
  share=False,
220
  server_name="0.0.0.0",
221
  server_port=7860,
222
  show_api=False,
223
- )
 
1
  import os
2
+ import json
3
+ import subprocess
4
  import sys
5
  from typing import List, Tuple
6
  from llama_cpp import Llama
 
11
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
12
  from huggingface_hub import hf_hub_download
13
  import gradio as gr
14
+ # from logger import logging
15
+ # from exception import CustomExceptionHandling
16
+
17
 
18
  # Load the Environment Variables from .env file
19
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
28
  local_dir="./models",
29
  )
30
 
31
+
32
  # Define the prompt markers for Gemma 3
33
  gemma_3_prompt_markers = {
34
+ Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"), # System prompt should be included within user message
35
  Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
36
  Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
37
+
38
+ Roles.tool: PromptMarkers("", ""), # If you need tool support
39
  }
40
 
41
+ # Create the formatter
42
  gemma_3_formatter = MessagesFormatter(
43
+ pre_prompt="", # No pre-prompt
44
  prompt_markers=gemma_3_prompt_markers,
45
+ include_sys_prompt_in_first_user_message=True, # Include system prompt in first user message
46
  default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
47
+ strip_prompt=False, # Don't strip whitespace from the prompt
48
+ bos_token="<bos>", # Beginning of sequence token for Gemma 3
49
+ eos_token="<eos>", # End of sequence token for Gemma 3
50
  )
51
 
52
+
53
+ # Set the title and description
54
+ title = "Kazakh Language Model"
55
+ description = """"""
56
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  llm = None
59
  llm_model = None
 
61
  def respond(
62
  message: str,
63
  history: List[Tuple[str, str]],
64
+ model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf", # Set default model
65
+ system_message: str = "",
66
+ max_tokens: int = 64,
67
  temperature: float = 0.7,
68
  top_p: float = 0.95,
69
  top_k: int = 40,
70
  repeat_penalty: float = 1.1,
71
  ):
72
  """
73
+ Respond to a message using the Gemma3 model via Llama.cpp.
 
74
  Args:
75
+ - message (str): The message to respond to.
76
+ - history (List[Tuple[str, str]]): The chat history.
77
+ - model (str): The model to use.
78
+ - system_message (str): The system message to use.
79
+ - max_tokens (int): The maximum number of tokens to generate.
80
+ - temperature (float): The temperature of the model.
81
+ - top_p (float): The top-p of the model.
82
+ - top_k (int): The top-k of the model.
83
+ - repeat_penalty (float): The repetition penalty of the model.
84
+ Returns:
85
+ str: The response to the message.
 
86
  """
87
+ # try:
88
+ # Load the global variables
89
+ global llm
90
+ global llm_model
91
 
92
+ # Ensure model is not None
93
  if model is None:
94
  model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
95
+
96
+ # Load the model
97
  if llm is None or llm_model != model:
98
+ # Check if model file exists
99
  model_path = f"models/{model}"
100
  if not os.path.exists(model_path):
101
+ yield f"Error: Model file not found at {model_path}. Please check your model path."
102
  return
103
+
104
  llm = Llama(
105
+ model_path=f"models/{model}",
106
  flash_attn=False,
107
  n_gpu_layers=0,
108
  n_batch=8,
 
113
  llm_model = model
114
  provider = LlamaCppPythonProvider(llm)
115
 
116
+ # Create the agent
 
 
 
 
117
  agent = LlamaCppAgent(
118
  provider,
119
+ system_prompt=f"{system_message}",
120
  custom_messages_formatter=gemma_3_formatter,
121
  debug_output=True,
122
  )
123
 
124
+ # Set the settings like temperature, top-k, top-p, max tokens, etc.
125
  settings = provider.get_provider_default_settings()
126
  settings.temperature = temperature
127
  settings.top_k = top_k
 
131
  settings.stream = True
132
 
133
  messages = BasicChatHistory()
 
 
 
 
134
 
135
+ # Add the chat history
136
+ for msn in history:
137
+ user = {"role": Roles.user, "content": msn[0]}
138
+ assistant = {"role": Roles.assistant, "content": msn[1]}
139
+ messages.add_message(user)
140
+ messages.add_message(assistant)
141
 
142
+ # Get the response stream
143
  stream = agent.get_chat_response(
144
+ message,
145
  llm_sampling_settings=settings,
146
  chat_history=messages,
147
  returns_streaming_generator=True,
148
  print_output=False,
149
  )
150
 
151
+ # Log the success
152
+ # logging.info("Response stream generated successfully")
153
+
154
+ # Generate the response
155
  outputs = ""
156
  for output in stream:
157
  outputs += output
158
  yield outputs
159
 
160
+ # # Handle exceptions that may occur during the process
161
+ # except Exception as e:
162
+ # # Custom exception handling
163
+ # raise CustomExceptionHandling(e, sys) from e
164
+
165
+
166
+ # Create a chat interface
167
  demo = gr.ChatInterface(
168
  respond,
169
+ examples=[["Сәлем"], ["Привет"], ["Hello"]],
170
+ additional_inputs_accordion=gr.Accordion(
171
+ label="⚙️ Parameters", open=False, render=False
172
+ ),
173
  additional_inputs=[
174
  gr.Dropdown(
175
+ choices=[
176
+ "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
177
+ ],
178
+ value="gemma_3_800M_sft_v2_translation-kazparc_latest.gguf",
179
+ label="Model",
180
+ info="Select the AI model to use for chat",
181
+ ),
182
+ gr.Textbox(
183
+ value="You are a helpful assistant.",
184
+ label="System Prompt",
185
+ info="Define the AI assistant's personality and behavior",
186
+ lines=2,
187
  ),
188
  gr.Slider(
189
  minimum=512,
 
191
  value=1024,
192
  step=1,
193
  label="Max Tokens",
194
+ info="Maximum length of response (higher = longer replies)",
195
  ),
196
  gr.Slider(
197
  minimum=0.1,
 
199
  value=0.7,
200
  step=0.1,
201
  label="Temperature",
202
+ info="Creativity level (higher = more creative, lower = more focused)",
203
  ),
204
  gr.Slider(
205
  minimum=0.1,
 
207
  value=0.95,
208
  step=0.05,
209
  label="Top-p",
210
+ info="Nucleus sampling threshold",
211
  ),
212
  gr.Slider(
213
  minimum=1,
 
215
  value=40,
216
  step=1,
217
  label="Top-k",
218
+ info="Limit vocabulary choices to top K tokens",
219
  ),
220
  gr.Slider(
221
  minimum=1.0,
 
223
  value=1.1,
224
  step=0.1,
225
  label="Repetition Penalty",
226
+ info="Penalize repeated words (higher = less repetition)",
227
  ),
228
  ],
229
  theme="Ocean",
230
+ submit_btn="Send",
231
  stop_btn="Stop",
232
+ title=title,
233
+ description=description,
234
  chatbot=gr.Chatbot(scale=1, show_copy_button=True),
235
  cache_examples=False,
236
  )
237
 
238
+
239
+ # Launch the chat interface
240
  if __name__ == "__main__":
241
  demo.launch(
242
  share=False,
243
  server_name="0.0.0.0",
244
  server_port=7860,
245
  show_api=False,
246
+ )