import os import time import subprocess import threading from huggingface_hub import HfApi, snapshot_download TOKEN = os.getenv("HF_TOKEN") DATASET_ID = os.getenv("DATASET_ID") # 定义需要持久化的文件夹和文件 # 该项目主要数据在 data,配置可能在根目录的 .json 或 .env PERSIST_FILES = ["data", "output", "config.json", "settings.json", ".env"] api = HfApi(token=TOKEN) def download_data(): if not DATASET_ID: return try: print(f"[System] 正在从 Dataset 拉取持久化数据...") # 仅下载数据文件,不下载代码文件 (.py, .html, .js) snapshot_download( repo_id=DATASET_ID, repo_type="dataset", local_dir=".", token=TOKEN, allow_patterns=["data/*", "output/*", "*.json", "*.env", "*.db"], ignore_patterns=["*.py", "static/*", "templates/*", "logs/*"] ) print("[System] 拉取完成。") except Exception as e: print(f"[System] 拉取失败: {e}") def upload_data(): while True: time.sleep(120) # 每 2 分钟备份一次 if not DATASET_ID: continue try: # 上传所有数据和配置文件 api.upload_folder( folder_path=".", repo_id=DATASET_ID, repo_type="dataset", allow_patterns=["data/*", "output/*", "*.json", "*.env", "*.db"], ignore_patterns=["logs/*", "__pycache__/*", "*.py", "static/*", "templates/*"], run_as_future=True ) print(f"[Backup] {time.strftime('%H:%M:%S')} 同步已提交至 Dataset") except Exception as e: if "No files have been modified" not in str(e): print(f"[Backup] 备份出错: {e}") if __name__ == "__main__": os.makedirs("data", exist_ok=True) os.makedirs("output", exist_ok=True) download_data() threading.Thread(target=upload_data, daemon=True).start() print("[System] 正在启动 Web UI...") # 强制端口 7860 subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])