Spaces:

baidu
/

simple_ernie_bot_demo

Running

App Files Files Community

maxiaolong03 commited on Jul 19

Commit

1eb802e

1 Parent(s): 8037955

add files

Browse files

Files changed (2) hide show

app.py +130 -32
bot_requests.py +88 -77

app.py CHANGED Viewed

@@ -102,14 +102,25 @@ def get_args() -> argparse.Namespace:
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
-    parser.add_argument("--server-port", type=int, default=7860, help="Demo server port.")
-    parser.add_argument("--server-name", type=str, default="0.0.0.0", help="Demo server name.")
-    parser.add_argument("--max_char", type=int, default=20000, help="Maximum character limit for messages.")
-    parser.add_argument("--max_retry_num", type=int, default=3, help="Maximum retry number for request.")
     parser.add_argument(
         "--model_map",
         type=str,
-        default="{\"ernie-4.5-turbo-vl-preview\": \"https://qianfan.baidubce.com/v2\"}",
         help="""JSON string defining model name to endpoint mappings.
             Required Format:
             {"ERNIE-4.5-VL": "http://localhost:port/v1"}
@@ -128,10 +139,22 @@ def get_args() -> argparse.Namespace:
         help="Web Search Service URL.",
     )
     parser.add_argument(
-        "--qianfan_api_key", type=str, default=os.environ.get('API_SEARCH_KEY'), help="Web Search Service API key."
     )
     parser.add_argument(
-        "--max_crawler_threads", type=int, default=10, help="The maximum number of concurrent crawler threads."
     )
     args = parser.parse_args()
@@ -158,7 +181,9 @@ class GradioEvents:
     """
     @staticmethod
-    def get_history_conversation(task_history: list, image_history: dict, file_history: dict) -> tuple:
         """
         Constructs complete conversation history from stored components including text messages,
         attached files and images. Processes each dialogue turn by combining the raw query/response
@@ -185,7 +210,12 @@ class GradioEvents:
             if idx in image_history:
                 content = []
                 for image_url in image_history[idx]:
-                    content.append({"type": "image_url", "image_url": {"url": GradioEvents.get_image_url(image_url)}})
                 content.append({"type": "text", "text": query_h})
                 conversation.append({"role": "user", "content": content})
             else:
@@ -194,7 +224,9 @@ class GradioEvents:
         return conversation, conversation_str
     @staticmethod
-    def get_search_query(conversation: list, model_name: str, bot_client: BotClient) -> list:
         """
         Processes conversation history to generate search queries by sending the conversation context
         to the model and parsing its JSON response. Handles model output validation and extracts
@@ -262,9 +294,12 @@ class GradioEvents:
         for file_url in files_url:
             extionsion = "." + file_url.split(".")[-1]
             if extionsion in TEXT_FILE_TYPE and (
-                len(file_history) == 0 or file_url not in list(file_history.values())[-1]
             ):
-                file_history[diologue_turn] = file_history.get(diologue_turn, []) + [file_url]
                 file_name = file_url.split("/")[-1]
                 file_contents_words = bot_client.cut_chinese_english(file_contents)
@@ -275,14 +310,25 @@ class GradioEvents:
                         + f"用户上传\n{file_name}\n{GradioEvents.get_file_text(file_url)}\n"
                     )
                     file_content_words = bot_client.cut_chinese_english(file_content)
-                    max_char = min(len(file_content_words), max_file_char - len(file_contents_words))
                     file_content_words = file_content_words[:max_char]
                     file_contents += "".join(file_content_words) + "\n"
             elif extionsion in IMAGE_FILE_TYPE and (
-                len(image_history) == 0 or file_url not in list(image_history.values())[-1]
             ):
-                image_history[diologue_turn] = image_history.get(diologue_turn, []) + [file_url]
-                input_content.append({"type": "image_url", "image_url": {"url": GradioEvents.get_image_url(file_url)}})
         return input_content, file_contents, ref_file_num
     @staticmethod
@@ -328,31 +374,50 @@ class GradioEvents:
         search_info_res = {}
         if search_state:
             search_info_message = SEARCH_INFO_PROMPT.format(
-                date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), context=conversation_str, query=query
             )
             search_conversation = [{"role": "user", "content": search_info_message}]
-            search_info_res = GradioEvents.get_search_query(search_conversation, model_name, bot_client)
             if search_info_res is None:
                 search_info_res = {"is_search": True, "query_list": [query]}
         # Process files
         diologue_turn = len(task_history)
-        if search_info_res.get("is_search", False) and search_info_res.get("query_list", []):
             max_file_char = max_ref_char // 2
         else:
             max_file_char = max_ref_char
         input_content, file_contents, ref_file_num = GradioEvents.process_files(
-            diologue_turn, files_url, file_history, image_history, bot_client, max_file_char
         )
         # Step 2: If a search is needed, obtain the corresponding query results
-        if search_info_res.get("is_search", False) and search_info_res.get("query_list", []):
             yield {"type": "search_result", "content": "🧐 努力搜索中... ✨"}
             search_result = bot_client.get_web_search_res(search_info_res["query_list"])
-            max_search_result_char = max_ref_char - len(bot_client.cut_chinese_english(file_contents))
             complete_search_result = await GradioEvents.get_complete_search_content(
-                ref_file_num, search_result, max_crawler_threads, bot_client, max_search_result_char
             )
             complete_ref = file_contents + complete_search_result
@@ -559,7 +624,15 @@ class GradioEvents:
         GradioEvents.gc()
         reset_result = namedtuple(
-            "reset_result", ["chatbot", "task_history", "image_history", "file_history", "file_btn", "search_result"]
         )
         return reset_result(
             [],  # clear chatbot
@@ -757,7 +830,10 @@ class GradioEvents:
                 search_res_words = search_res_words[:max_char]
                 item_text = "".join(search_res_words)
-            results.append(f"\n参考资料[{len(results) + 1 + ref_file_num}]:\n" + f"资料来源：素材检索\n{item_text}\n")
         return "".join(results)
@@ -819,15 +895,21 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
 <a href="https://yiyan.baidu.com/blog/publication/">Technical Report</a></center>"""
         )
-        chatbot = gr.Chatbot(label="ERNIE", elem_classes="control-height", type="messages")
-        search_result = gr.Textbox(label="Search Result", lines=10, max_lines=10, visible=False)
         with gr.Row():
             search_check = gr.Checkbox(label="🌐 Search the web(联网搜索)")
         with gr.Row():
-            query = gr.Textbox(label="Input", lines=1, scale=6, elem_classes="input-textbox")
             file_btn = gr.File(
                 label="File upload (Accepted formats: PNG, JPEG, JPG, PDF, TXT, MD, DOC, DOCX)",
                 scale=4,
@@ -847,10 +929,16 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
         model_name = gr.State(next(iter(args.model_map.keys())))
         max_crawler_threads = gr.State(args.max_crawler_threads)
-        search_check.change(fn=GradioEvents.search_toggle_state, inputs=search_check, outputs=search_result)
         predict_with_clients = partial(GradioEvents.predict, bot_client=bot_client)
-        regenerate_with_clients = partial(GradioEvents.regenerate, bot_client=bot_client)
         query.submit(
             predict_with_clients,
             inputs=[
@@ -887,7 +975,14 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
             GradioEvents.reset_state,
-            outputs=[chatbot, task_history, image_history, file_history, file_btn, search_result],
             show_progress=True,
         )
         regen_btn.click(
@@ -906,7 +1001,10 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
             show_progress=True,
         )
-    demo.queue().launch(server_port=args.server_port, server_name=args.server_name)
 def main():

     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
+    parser.add_argument(
+        "--server-port", type=int, default=7860, help="Demo server port."
+    )
+    parser.add_argument(
+        "--server-name", type=str, default="0.0.0.0", help="Demo server name."
+    )
+    parser.add_argument(
+        "--max_char",
+        type=int,
+        default=20000,
+        help="Maximum character limit for messages.",
+    )
+    parser.add_argument(
+        "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
+    )
     parser.add_argument(
         "--model_map",
         type=str,
+        default='{"ernie-4.5-turbo-vl-preview": "https://qianfan.baidubce.com/v2"}',
         help="""JSON string defining model name to endpoint mappings.
             Required Format:
             {"ERNIE-4.5-VL": "http://localhost:port/v1"}
         help="Web Search Service URL.",
     )
     parser.add_argument(
+        "--qianfan_api_key",
+        type=str,
+        default=os.environ.get('API_SEARCH_KEY'),
+        help="Web Search Service API key.",
+    )
+    parser.add_argument(
+        "--max_crawler_threads",
+        type=int,
+        default=10,
+        help="The maximum number of concurrent crawler threads.",
+    )
+    parser.add_argument(
+        "--concurrency_limit", type=int, default=10, help="Default concurrency limit."
     )
     parser.add_argument(
+        "--max_queue_size", type=int, default=50, help="Maximum queue size for request."
     )
     args = parser.parse_args()
     """
     @staticmethod
+    def get_history_conversation(
+        task_history: list, image_history: dict, file_history: dict
+    ) -> tuple:
         """
         Constructs complete conversation history from stored components including text messages,
         attached files and images. Processes each dialogue turn by combining the raw query/response
             if idx in image_history:
                 content = []
                 for image_url in image_history[idx]:
+                    content.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": GradioEvents.get_image_url(image_url)},
+                        }
+                    )
                 content.append({"type": "text", "text": query_h})
                 conversation.append({"role": "user", "content": content})
             else:
         return conversation, conversation_str
     @staticmethod
+    def get_search_query(
+        conversation: list, model_name: str, bot_client: BotClient
+    ) -> list:
         """
         Processes conversation history to generate search queries by sending the conversation context
         to the model and parsing its JSON response. Handles model output validation and extracts
         for file_url in files_url:
             extionsion = "." + file_url.split(".")[-1]
             if extionsion in TEXT_FILE_TYPE and (
+                len(file_history) == 0
+                or file_url not in list(file_history.values())[-1]
             ):
+                file_history[diologue_turn] = file_history.get(diologue_turn, []) + [
+                    file_url
+                ]
                 file_name = file_url.split("/")[-1]
                 file_contents_words = bot_client.cut_chinese_english(file_contents)
                         + f"用户上传\n{file_name}\n{GradioEvents.get_file_text(file_url)}\n"
                     )
                     file_content_words = bot_client.cut_chinese_english(file_content)
+                    max_char = min(
+                        len(file_content_words),
+                        max_file_char - len(file_contents_words),
+                    )
                     file_content_words = file_content_words[:max_char]
                     file_contents += "".join(file_content_words) + "\n"
             elif extionsion in IMAGE_FILE_TYPE and (
+                len(image_history) == 0
+                or file_url not in list(image_history.values())[-1]
             ):
+                image_history[diologue_turn] = image_history.get(diologue_turn, []) + [
+                    file_url
+                ]
+                input_content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": GradioEvents.get_image_url(file_url)},
+                    }
+                )
         return input_content, file_contents, ref_file_num
     @staticmethod
         search_info_res = {}
         if search_state:
             search_info_message = SEARCH_INFO_PROMPT.format(
+                date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                context=conversation_str,
+                query=query,
             )
             search_conversation = [{"role": "user", "content": search_info_message}]
+            search_info_res = GradioEvents.get_search_query(
+                search_conversation, model_name, bot_client
+            )
             if search_info_res is None:
                 search_info_res = {"is_search": True, "query_list": [query]}
         # Process files
         diologue_turn = len(task_history)
+        if search_info_res.get("is_search", False) and search_info_res.get(
+            "query_list", []
+        ):
             max_file_char = max_ref_char // 2
         else:
             max_file_char = max_ref_char
         input_content, file_contents, ref_file_num = GradioEvents.process_files(
+            diologue_turn,
+            files_url,
+            file_history,
+            image_history,
+            bot_client,
+            max_file_char,
         )
         # Step 2: If a search is needed, obtain the corresponding query results
+        if search_info_res.get("is_search", False) and search_info_res.get(
+            "query_list", []
+        ):
             yield {"type": "search_result", "content": "🧐 努力搜索中... ✨"}
             search_result = bot_client.get_web_search_res(search_info_res["query_list"])
+            max_search_result_char = max_ref_char - len(
+                bot_client.cut_chinese_english(file_contents)
+            )
             complete_search_result = await GradioEvents.get_complete_search_content(
+                ref_file_num,
+                search_result,
+                max_crawler_threads,
+                bot_client,
+                max_search_result_char,
             )
             complete_ref = file_contents + complete_search_result
         GradioEvents.gc()
         reset_result = namedtuple(
+            "reset_result",
+            [
+                "chatbot",
+                "task_history",
+                "image_history",
+                "file_history",
+                "file_btn",
+                "search_result",
+            ],
         )
         return reset_result(
             [],  # clear chatbot
                 search_res_words = search_res_words[:max_char]
                 item_text = "".join(search_res_words)
+            results.append(
+                f"\n参考资料[{len(results) + 1 + ref_file_num}]:\n"
+                + f"资料来源：素材检索\n{item_text}\n"
+            )
         return "".join(results)
 <a href="https://yiyan.baidu.com/blog/publication/">Technical Report</a></center>"""
         )
+        chatbot = gr.Chatbot(
+            label="ERNIE", elem_classes="control-height", type="messages"
+        )
+        search_result = gr.Textbox(
+            label="Search Result", lines=10, max_lines=10, visible=False
+        )
         with gr.Row():
             search_check = gr.Checkbox(label="🌐 Search the web(联网搜索)")
         with gr.Row():
+            query = gr.Textbox(
+                label="Input", lines=1, scale=6, elem_classes="input-textbox"
+            )
             file_btn = gr.File(
                 label="File upload (Accepted formats: PNG, JPEG, JPG, PDF, TXT, MD, DOC, DOCX)",
                 scale=4,
         model_name = gr.State(next(iter(args.model_map.keys())))
         max_crawler_threads = gr.State(args.max_crawler_threads)
+        search_check.change(
+            fn=GradioEvents.search_toggle_state,
+            inputs=search_check,
+            outputs=search_result,
+        )
         predict_with_clients = partial(GradioEvents.predict, bot_client=bot_client)
+        regenerate_with_clients = partial(
+            GradioEvents.regenerate, bot_client=bot_client
+        )
         query.submit(
             predict_with_clients,
             inputs=[
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
         empty_btn.click(
             GradioEvents.reset_state,
+            outputs=[
+                chatbot,
+                task_history,
+                image_history,
+                file_history,
+                file_btn,
+                search_result,
+            ],
             show_progress=True,
         )
         regen_btn.click(
             show_progress=True,
         )
+    demo.queue(
+        default_concurrency_limit=args.concurrency_limit, max_size=args.max_queue_size
+    )
+    demo.launch(server_port=args.server_port, server_name=args.server_name)
 def main():

bot_requests.py CHANGED Viewed

@@ -16,20 +16,22 @@
 import os
 import argparse
 import logging
 import traceback
-import json
 import jieba
 from openai import OpenAI
-import requests
-class BotClient(object):
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
-        Initializes the BotClient instance by configuring essential parameters from command line arguments
-        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
@@ -37,25 +39,29 @@ class BotClient(object):
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
-        self.max_retry_num = getattr(args, 'max_retry_num', 3)
-        self.max_char = getattr(args, 'max_char', 8000)
-        self.model_map = getattr(args, 'model_map', {})
         self.api_key = os.environ.get('API_KEY')
-        self.embedding_service_url = getattr(args, 'embedding_service_url', 'embedding_service_url')
-        self.embedding_model = getattr(args, 'embedding_model', 'embedding_model')
-        self.web_search_service_url = getattr(args, 'web_search_service_url', 'web_search_service_url')
-        self.max_search_results_num = getattr(args, 'max_search_results_num', 15)
         self.qianfan_api_key = os.environ.get('API_SEARCH_KEY')
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
-        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
-        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
@@ -68,20 +74,18 @@ class BotClient(object):
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
-            response = client.chat.completions.create(
-                **req_data
-            )
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
-        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
@@ -100,25 +104,25 @@ class BotClient(object):
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def process(
-        self,
-        model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
-        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
-        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
@@ -140,7 +144,7 @@ class BotClient(object):
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
@@ -153,15 +157,16 @@ class BotClient(object):
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
-        self, model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
@@ -184,29 +189,30 @@ class BotClient(object):
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
-                for chunk in self.call_back_stream(model_url, req_data):
-                    yield chunk
                 return
             except Exception as e:
                 last_error = e
-                self.logger.error("Stream request failed (attempt {}/{}): {}".format(_ + 1, self.max_retry_num, e))
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
-        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
-        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
@@ -219,7 +225,9 @@ class BotClient(object):
         en_ch_words = []
         for word in words:
-            if word.isalpha() and not any("\u4e00" <= char <= "\u9fff" for char in word):
                 en_ch_words.append(word)
             else:
                 en_ch_words.extend(list(word))
@@ -239,10 +247,10 @@ class BotClient(object):
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
@@ -251,31 +259,33 @@ class BotClient(object):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
-            processed.append({
-                "role": msg["role"],
-                "original_content": msg["content"],  # Preserve original content
-                "text_content": text_content,        # Extracted plain text
-                "units": units,
-                "unit_count": unit_count
-            })
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
@@ -293,7 +303,7 @@ class BotClient(object):
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
@@ -313,7 +323,7 @@ class BotClient(object):
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
@@ -331,15 +341,12 @@ class BotClient(object):
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
-                result.append({
-                    "role": msg["role"],
-                    "content": msg["original_content"]
-                })
         return result
     def embed_fn(self, text: str) -> list:
@@ -352,7 +359,9 @@ class BotClient(object):
         Returns:
             list: A list of floats representing the embedding.
         """
-        client = OpenAI(base_url=self.embedding_service_url, api_key=self.qianfan_api_key)
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
@@ -368,7 +377,7 @@ class BotClient(object):
         """
         headers = {
             "Authorization": "Bearer " + self.qianfan_api_key,
-            "Content-Type": "application/json"
         }
         results = []
@@ -376,9 +385,11 @@ class BotClient(object):
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
-                "resource_type_filter": [{"type": "web", "top_k": top_k}]
             }
-            response = requests.post(self.web_search_service_url, headers=headers, json=payload)
             if response.status_code == 200:
                 response = response.json()
@@ -387,4 +398,4 @@ class BotClient(object):
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
-        return results

 import os
 import argparse
+import json
 import logging
 import traceback
 import jieba
+import requests
 from openai import OpenAI
+class BotClient:
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
+        Initializes the BotClient instance by configuring essential parameters from command line arguments
+        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
+        self.max_retry_num = getattr(args, "max_retry_num", 3)
+        self.max_char = getattr(args, "max_char", 8000)
+        self.model_map = getattr(args, "model_map", {})
         self.api_key = os.environ.get('API_KEY')
+        self.embedding_service_url = getattr(
+            args, "embedding_service_url", "embedding_service_url"
+        )
+        self.embedding_model = getattr(args, "embedding_model", "embedding_model")
+        self.web_search_service_url = getattr(
+            args, "web_search_service_url", "web_search_service_url"
+        )
+        self.max_search_results_num = getattr(args, "max_search_results_num", 15)
         self.qianfan_api_key = os.environ.get('API_SEARCH_KEY')
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
+        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
+        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
+            response = client.chat.completions.create(**req_data)
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
+        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def process(
+        self,
+        model_name: str,
+        req_data: dict,
+        max_tokens: int = 2048,
+        temperature: float = 1.0,
+        top_p: float = 0.7,
     ) -> dict:
         """
+        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
+        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
+        self,
+        model_name: str,
+        req_data: dict,
+        max_tokens: int = 2048,
+        temperature: float = 1.0,
+        top_p: float = 0.7,
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
+                yield from self.call_back_stream(model_url, req_data)
                 return
             except Exception as e:
                 last_error = e
+                self.logger.error(
+                    f"Stream request failed (attempt {_ + 1}/{self.max_retry_num}): {e}"
+                )
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
+        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
+        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
         en_ch_words = []
         for word in words:
+            if word.isalpha() and not any(
+                "\u4e00" <= char <= "\u9fff" for char in word
+            ):
                 en_ch_words.append(word)
             else:
                 en_ch_words.extend(list(word))
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
+            processed.append(
+                {
+                    "role": msg["role"],
+                    "original_content": msg["content"],  # Preserve original content
+                    "text_content": text_content,  # Extracted plain text
+                    "units": units,
+                    "unit_count": unit_count,
+                }
+            )
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
+                result.append({"role": msg["role"], "content": msg["original_content"]})
         return result
     def embed_fn(self, text: str) -> list:
         Returns:
             list: A list of floats representing the embedding.
         """
+        client = OpenAI(
+            base_url=self.embedding_service_url, api_key=self.qianfan_api_key
+        )
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
         """
         headers = {
             "Authorization": "Bearer " + self.qianfan_api_key,
+            "Content-Type": "application/json",
         }
         results = []
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
+                "resource_type_filter": [{"type": "web", "top_k": top_k}],
             }
+            response = requests.post(
+                self.web_search_service_url, headers=headers, json=payload
+            )
             if response.status_code == 200:
                 response = response.json()
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
+        return results