Beibars003 commited on
Commit
e716c4d
·
verified ·
1 Parent(s): 331f576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -67
app.py CHANGED
@@ -1,6 +1,3 @@
1
- import warnings
2
- warnings.filterwarnings("ignore")
3
-
4
  import os
5
  import sys
6
  from typing import List, Tuple
@@ -12,8 +9,6 @@ from llama_cpp_agent.chat_history.messages import Roles
12
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
13
  from huggingface_hub import hf_hub_download
14
  import gradio as gr
15
- from logger import logging
16
- from exception import CustomExceptionHandling
17
 
18
  # Load the Environment Variables from .env file
19
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -97,69 +92,65 @@ def respond(
97
  Yields:
98
  str: The translated text as it is generated.
99
  """
100
- try:
101
- global llm, llm_model
102
- if llm is None or llm_model != model:
103
- model_path = f"models/{model}"
104
- if not os.path.exists(model_path):
105
- yield f"Error: Model file not found at {model_path}."
106
- return
107
- llm = Llama(
108
- model_path=model_path,
109
- flash_attn=False,
110
- n_gpu_layers=0,
111
- n_batch=8,
112
- n_ctx=2048,
113
- n_threads=8,
114
- n_threads_batch=8,
115
- )
116
- llm_model = model
117
- provider = LlamaCppPythonProvider(llm)
118
-
119
- # Get system prompt and user prefix based on direction
120
- prompts = direction_to_prompts[direction]
121
- system_message = prompts["system"]
122
- user_prefix = prompts["prefix"]
123
-
124
- agent = LlamaCppAgent(
125
- provider,
126
- system_prompt=system_message,
127
- custom_messages_formatter=gemma_3_formatter,
128
- debug_output=True,
129
- )
130
-
131
- settings = provider.get_provider_default_settings()
132
- settings.temperature = temperature
133
- settings.top_k = top_k
134
- settings.top_p = top_p
135
- settings.max_tokens = max_tokens
136
- settings.repeat_penalty = repeat_penalty
137
- settings.stream = True
138
-
139
- messages = BasicChatHistory()
140
- for user_msg, assistant_msg in history:
141
- full_user_msg = user_prefix + " " + user_msg
142
- messages.add_message({"role": Roles.user, "content": full_user_msg})
143
- messages.add_message({"role": Roles.assistant, "content": assistant_msg})
144
-
145
- full_message = user_prefix + " " + message
146
-
147
- stream = agent.get_chat_response(
148
- full_message,
149
- llm_sampling_settings=settings,
150
- chat_history=messages,
151
- returns_streaming_generator=True,
152
- print_output=False,
153
  )
154
-
155
- logging.info("Response stream generated successfully")
156
- outputs = ""
157
- for output in stream:
158
- outputs += output
159
- yield outputs
160
-
161
- except Exception as e:
162
- raise CustomExceptionHandling(e, sys) from e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  demo = gr.ChatInterface(
165
  respond,
 
 
 
 
1
  import os
2
  import sys
3
  from typing import List, Tuple
 
9
  from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers
10
  from huggingface_hub import hf_hub_download
11
  import gradio as gr
 
 
12
 
13
  # Load the Environment Variables from .env file
14
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
92
  Yields:
93
  str: The translated text as it is generated.
94
  """
95
+
96
+ global llm, llm_model
97
+ if llm is None or llm_model != model:
98
+ model_path = f"models/{model}"
99
+ if not os.path.exists(model_path):
100
+ yield f"Error: Model file not found at {model_path}."
101
+ return
102
+ llm = Llama(
103
+ model_path=model_path,
104
+ flash_attn=False,
105
+ n_gpu_layers=0,
106
+ n_batch=8,
107
+ n_ctx=2048,
108
+ n_threads=8,
109
+ n_threads_batch=8,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  )
111
+ llm_model = model
112
+ provider = LlamaCppPythonProvider(llm)
113
+
114
+ # Get system prompt and user prefix based on direction
115
+ prompts = direction_to_prompts[direction]
116
+ system_message = prompts["system"]
117
+ user_prefix = prompts["prefix"]
118
+
119
+ agent = LlamaCppAgent(
120
+ provider,
121
+ system_prompt=system_message,
122
+ custom_messages_formatter=gemma_3_formatter,
123
+ debug_output=True,
124
+ )
125
+
126
+ settings = provider.get_provider_default_settings()
127
+ settings.temperature = temperature
128
+ settings.top_k = top_k
129
+ settings.top_p = top_p
130
+ settings.max_tokens = max_tokens
131
+ settings.repeat_penalty = repeat_penalty
132
+ settings.stream = True
133
+
134
+ messages = BasicChatHistory()
135
+ for user_msg, assistant_msg in history:
136
+ full_user_msg = user_prefix + " " + user_msg
137
+ messages.add_message({"role": Roles.user, "content": full_user_msg})
138
+ messages.add_message({"role": Roles.assistant, "content": assistant_msg})
139
+
140
+ full_message = user_prefix + " " + message
141
+
142
+ stream = agent.get_chat_response(
143
+ full_message,
144
+ llm_sampling_settings=settings,
145
+ chat_history=messages,
146
+ returns_streaming_generator=True,
147
+ print_output=False,
148
+ )
149
+
150
+ outputs = ""
151
+ for output in stream:
152
+ outputs += output
153
+ yield outputs
154
 
155
  demo = gr.ChatInterface(
156
  respond,