Beibars003 commited on
Commit
8196f26
verified
1 Parent(s): c54f432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -88
app.py CHANGED
@@ -1,6 +1,7 @@
 
 
 
1
  import os
2
- import json
3
- import subprocess
4
  import sys
5
  from typing import List, Tuple
6
  from llama_cpp import Llama
@@ -11,9 +12,6 @@ from llama_cpp_agent.chat_history.messages import Roles
11
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
12
  from huggingface_hub import hf_hub_download
13
  import gradio as gr
14
- # from logger import logging
15
- # from exception import CustomExceptionHandling
16
-
17
 
18
  # Load the Environment Variables from .env file
19
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -28,25 +26,22 @@ hf_hub_download(
28
  local_dir="./models",
29
  )
30
 
31
-
32
  # Define the prompt markers for Gemma 3
33
  gemma_3_prompt_markers = {
34
- Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"), # System prompt should be included within user message
35
  Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
36
  Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
37
-
38
- Roles.tool: PromptMarkers("", ""), # If you need tool support
39
  }
40
 
41
- # Create the formatter
42
  gemma_3_formatter = MessagesFormatter(
43
- pre_prompt="", # No pre-prompt
44
  prompt_markers=gemma_3_prompt_markers,
45
- include_sys_prompt_in_first_user_message=True, # Include system prompt in first user message
46
  default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
47
- strip_prompt=False, # Don't strip whitespace from the prompt
48
- bos_token="<bos>", # Beginning of sequence token for Gemma 3
49
- eos_token="<eos>", # End of sequence token for Gemma 3
50
  )
51
 
52
  # Translation direction to prompts mapping
@@ -69,59 +64,43 @@ direction_to_prompts = {
69
  }
70
  }
71
 
72
- # Set the title and description
73
- title = "Kazakh Language Model"
74
- description = """"""
75
-
76
-
77
  llm = None
78
  llm_model = None
79
 
80
  def respond(
81
- message: str,
82
- history: List[Tuple[str, str]],
83
- #model: str = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf", # Set default model
84
- direction: str = "English to Kazakh",
85
- max_tokens: int = 64,
86
  temperature: float = 0.7,
87
  top_p: float = 0.95,
88
  top_k: int = 40,
89
  repeat_penalty: float = 1.1,
90
  ):
91
  """
92
- Respond to a message using the Gemma3 model via Llama.cpp.
 
93
  Args:
94
- - message (str): The message to respond to.
95
- - history (List[Tuple[str, str]]): The chat history.
96
- - model (str): The model to use.
97
- - system_message (str): The system message to use.
98
- - max_tokens (int): The maximum number of tokens to generate.
99
- - temperature (float): The temperature of the model.
100
- - top_p (float): The top-p of the model.
101
- - top_k (int): The top-k of the model.
102
- - repeat_penalty (float): The repetition penalty of the model.
103
- Returns:
104
- str: The response to the message.
 
105
  """
106
- # try:
107
- # Load the global variables
108
- global llm
109
- global llm_model
110
 
111
- # Ensure model is not None
112
- if model is None:
113
- model = "gemma_3_800M_sft_v2_translation-kazparc_latest.gguf"
114
-
115
- # Load the model
116
  if llm is None or llm_model != model:
117
- # Check if model file exists
118
  model_path = f"models/{model}"
119
  if not os.path.exists(model_path):
120
- yield f"Error: Model file not found at {model_path}. Please check your model path."
121
  return
122
-
123
  llm = Llama(
124
- model_path=f"models/{model}",
125
  flash_attn=False,
126
  n_gpu_layers=0,
127
  n_batch=8,
@@ -132,15 +111,18 @@ def respond(
132
  llm_model = model
133
  provider = LlamaCppPythonProvider(llm)
134
 
135
- # Create the agent
 
 
 
 
136
  agent = LlamaCppAgent(
137
  provider,
138
- system_prompt=f"{system_message}",
139
  custom_messages_formatter=gemma_3_formatter,
140
  debug_output=True,
141
  )
142
 
143
- # Set the settings like temperature, top-k, top-p, max tokens, etc.
144
  settings = provider.get_provider_default_settings()
145
  settings.temperature = temperature
146
  settings.top_k = top_k
@@ -150,15 +132,13 @@ def respond(
150
  settings.stream = True
151
 
152
  messages = BasicChatHistory()
153
-
154
- # Add the chat history
155
  for user_msg, assistant_msg in history:
156
  full_user_msg = user_prefix + " " + user_msg
157
  messages.add_message({"role": Roles.user, "content": full_user_msg})
158
  messages.add_message({"role": Roles.assistant, "content": assistant_msg})
 
159
  full_message = user_prefix + " " + message
160
 
161
- # Get the response stream
162
  stream = agent.get_chat_response(
163
  full_message,
164
  llm_sampling_settings=settings,
@@ -167,47 +147,28 @@ def respond(
167
  print_output=False,
168
  )
169
 
170
- # Log the success
171
- # logging.info("Response stream generated successfully")
172
-
173
- # Generate the response
174
  outputs = ""
175
  for output in stream:
176
  outputs += output
177
  yield outputs
178
 
179
- # # Handle exceptions that may occur during the process
180
- # except Exception as e:
181
- # # Custom exception handling
182
- # raise CustomExceptionHandling(e, sys) from e
183
-
184
-
185
- # Create a chat interface
186
  demo = gr.ChatInterface(
187
  respond,
188
- examples=[["小訖谢械屑"], ["袩褉懈胁械褌"], ["Hello"]],
189
- additional_inputs_accordion=gr.Accordion(
190
- label="鈿欙笍 Parameters", open=False, render=False
191
- ),
192
  additional_inputs=[
193
  gr.Dropdown(
194
  choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
195
  label="Translation Direction",
196
  info="Select the direction of translation"
197
  ),
198
- gr.Textbox(
199
- value="You are a helpful assistant.",
200
- label="System Prompt",
201
- info="Define the AI assistant's personality and behavior",
202
- lines=2,
203
- ),
204
  gr.Slider(
205
  minimum=512,
206
  maximum=2048,
207
  value=1024,
208
  step=1,
209
  label="Max Tokens",
210
- info="Maximum length of response (higher = longer replies)",
211
  ),
212
  gr.Slider(
213
  minimum=0.1,
@@ -215,7 +176,7 @@ demo = gr.ChatInterface(
215
  value=0.7,
216
  step=0.1,
217
  label="Temperature",
218
- info="Creativity level (higher = more creative, lower = more focused)",
219
  ),
220
  gr.Slider(
221
  minimum=0.1,
@@ -223,7 +184,7 @@ demo = gr.ChatInterface(
223
  value=0.95,
224
  step=0.05,
225
  label="Top-p",
226
- info="Nucleus sampling threshold",
227
  ),
228
  gr.Slider(
229
  minimum=1,
@@ -231,7 +192,7 @@ demo = gr.ChatInterface(
231
  value=40,
232
  step=1,
233
  label="Top-k",
234
- info="Limit vocabulary choices to top K tokens",
235
  ),
236
  gr.Slider(
237
  minimum=1.0,
@@ -239,24 +200,22 @@ demo = gr.ChatInterface(
239
  value=1.1,
240
  step=0.1,
241
  label="Repetition Penalty",
242
- info="Penalize repeated words (higher = less repetition)",
243
  ),
244
  ],
245
  theme="Ocean",
246
- submit_btn="Send",
247
  stop_btn="Stop",
248
- title=title,
249
- description=description,
250
  chatbot=gr.Chatbot(scale=1, show_copy_button=True),
251
  cache_examples=False,
252
  )
253
 
254
-
255
- # Launch the chat interface
256
  if __name__ == "__main__":
257
  demo.launch(
258
  share=False,
259
  server_name="0.0.0.0",
260
  server_port=7860,
261
  show_api=False,
262
- )
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
  import os
 
 
5
  import sys
6
  from typing import List, Tuple
7
  from llama_cpp import Llama
 
12
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
13
  from huggingface_hub import hf_hub_download
14
  import gradio as gr
 
 
 
15
 
16
  # Load the Environment Variables from .env file
17
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
26
  local_dir="./models",
27
  )
28
 
 
29
  # Define the prompt markers for Gemma 3
30
  gemma_3_prompt_markers = {
31
+ Roles.system: PromptMarkers("<start_of_turn>system\n", "<end_of_turn>\n"),
32
  Roles.user: PromptMarkers("<start_of_turn>user\n", "<end_of_turn>\n"),
33
  Roles.assistant: PromptMarkers("<start_of_turn>assistant", ""),
34
+ Roles.tool: PromptMarkers("", ""),
 
35
  }
36
 
 
37
  gemma_3_formatter = MessagesFormatter(
38
+ pre_prompt="",
39
  prompt_markers=gemma_3_prompt_markers,
40
+ include_sys_prompt_in_first_user_message=True,
41
  default_stop_sequences=["<end_of_turn>", "<start_of_turn>"],
42
+ strip_prompt=False,
43
+ bos_token="<bos>",
44
+ eos_token="<eos>",
45
  )
46
 
47
  # Translation direction to prompts mapping
 
64
  }
65
  }
66
 
 
 
 
 
 
67
  llm = None
68
  llm_model = None
69
 
70
  def respond(
71
+ direction: str,
72
+ max_tokens: int = 1024,
 
 
 
73
  temperature: float = 0.7,
74
  top_p: float = 0.95,
75
  top_k: int = 40,
76
  repeat_penalty: float = 1.1,
77
  ):
78
  """
79
+ Respond to a message by translating it using the specified direction.
80
+
81
  Args:
82
+ message (str): The text to translate.
83
+ history (List[Tuple[str, str]]): The chat history.
84
+ direction (str): The translation direction (e.g., "English to Kazakh").
85
+ model (str): The model file to use.
86
+ max_tokens (int): Maximum number of tokens to generate.
87
+ temperature (float): Sampling temperature.
88
+ top_p (float): Top-p sampling parameter.
89
+ top_k (int): Top-k sampling parameter.
90
+ repeat_penalty (float): Penalty for repetition.
91
+
92
+ Yields:
93
+ str: The translated text as it is generated.
94
  """
 
 
 
 
95
 
96
+ global llm, llm_model
 
 
 
 
97
  if llm is None or llm_model != model:
 
98
  model_path = f"models/{model}"
99
  if not os.path.exists(model_path):
100
+ yield f"Error: Model file not found at {model_path}."
101
  return
 
102
  llm = Llama(
103
+ model_path=model_path,
104
  flash_attn=False,
105
  n_gpu_layers=0,
106
  n_batch=8,
 
111
  llm_model = model
112
  provider = LlamaCppPythonProvider(llm)
113
 
114
+ # Get system prompt and user prefix based on direction
115
+ prompts = direction_to_prompts[direction]
116
+ system_message = prompts["system"]
117
+ user_prefix = prompts["prefix"]
118
+
119
  agent = LlamaCppAgent(
120
  provider,
121
+ system_prompt=system_message,
122
  custom_messages_formatter=gemma_3_formatter,
123
  debug_output=True,
124
  )
125
 
 
126
  settings = provider.get_provider_default_settings()
127
  settings.temperature = temperature
128
  settings.top_k = top_k
 
132
  settings.stream = True
133
 
134
  messages = BasicChatHistory()
 
 
135
  for user_msg, assistant_msg in history:
136
  full_user_msg = user_prefix + " " + user_msg
137
  messages.add_message({"role": Roles.user, "content": full_user_msg})
138
  messages.add_message({"role": Roles.assistant, "content": assistant_msg})
139
+
140
  full_message = user_prefix + " " + message
141
 
 
142
  stream = agent.get_chat_response(
143
  full_message,
144
  llm_sampling_settings=settings,
 
147
  print_output=False,
148
  )
149
 
 
 
 
 
150
  outputs = ""
151
  for output in stream:
152
  outputs += output
153
  yield outputs
154
 
 
 
 
 
 
 
 
155
  demo = gr.ChatInterface(
156
  respond,
157
+ examples=[["Hello"], ["小訖谢械屑"], ["袩褉懈胁械褌"]],
158
+ additional_inputs_accordion=gr.Accordion(label="鈿欙笍 Parameters", open=False, render=False),
 
 
159
  additional_inputs=[
160
  gr.Dropdown(
161
  choices=["English to Kazakh", "Kazakh to English", "Kazakh to Russian", "Russian to Kazakh"],
162
  label="Translation Direction",
163
  info="Select the direction of translation"
164
  ),
 
 
 
 
 
 
165
  gr.Slider(
166
  minimum=512,
167
  maximum=2048,
168
  value=1024,
169
  step=1,
170
  label="Max Tokens",
171
+ info="Maximum length of the translation"
172
  ),
173
  gr.Slider(
174
  minimum=0.1,
 
176
  value=0.7,
177
  step=0.1,
178
  label="Temperature",
179
+ info="Controls randomness (higher = more creative)"
180
  ),
181
  gr.Slider(
182
  minimum=0.1,
 
184
  value=0.95,
185
  step=0.05,
186
  label="Top-p",
187
+ info="Nucleus sampling threshold"
188
  ),
189
  gr.Slider(
190
  minimum=1,
 
192
  value=40,
193
  step=1,
194
  label="Top-k",
195
+ info="Limits vocabulary to top K tokens"
196
  ),
197
  gr.Slider(
198
  minimum=1.0,
 
200
  value=1.1,
201
  step=0.1,
202
  label="Repetition Penalty",
203
+ info="Penalizes repeated words"
204
  ),
205
  ],
206
  theme="Ocean",
207
+ submit_btn="Translate",
208
  stop_btn="Stop",
209
+ title="Kazakh Translation Model",
210
+ description="Translate text between Kazakh, English, and Russian using a specialized language model.",
211
  chatbot=gr.Chatbot(scale=1, show_copy_button=True),
212
  cache_examples=False,
213
  )
214
 
 
 
215
  if __name__ == "__main__":
216
  demo.launch(
217
  share=False,
218
  server_name="0.0.0.0",
219
  server_port=7860,
220
  show_api=False,
221
+ )