Spaces:
Sleeping
Sleeping
| # app.py — Falcon H1 7B Instruct loader + Gradio UI (HF Spaces) | |
| import os, sys, subprocess | |
| # ---- 0) Environment hygiene (Spaces shows "libgomp" warning) ---- | |
| os.environ["OMP_NUM_THREADS"] = "1" | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # faster downloads when available | |
| # ---- 1) Upgrade critical deps BEFORE importing transformers ---- | |
| def pipi(cmd: str): | |
| print(f"[pip] {cmd}") | |
| subprocess.run([sys.executable, "-m", "pip"] + cmd.split(), check=True) | |
| pipi("install --upgrade pip") | |
| pipi("install --upgrade --no-cache-dir safetensors>=0.4.5 tokenizers>=0.19.1 accelerate>=0.33.0") | |
| # Falcon-H1 needs the newest transformers; install from GitHub to be safe (per model card) | |
| pipi("install --no-cache-dir git+https://github.com/huggingface/transformers.git") | |
| # ---- 2) Imports AFTER upgrades ---- | |
| import torch | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from huggingface_hub import snapshot_download | |
| # ---- 3) Config ---- | |
| MODEL_NAME_PRIMARY = "tiiuae/Falcon-H1-7B-Instruct" | |
| MODEL_NAME_FALLBACK = "tiiuae/falcon-7b-instruct" # stable fallback | |
| MODEL_LOCAL_DIR = "./falcon_local" | |
| MAX_NEW_TOKENS = 120 | |
| TEMPERATURE = 0.30 | |
| TOP_P = 0.90 | |
| REPETITION_PENALTY = 1.8 | |
| print("🚀 Preparing model…") | |
| print("ℹ️ Target primary model:", MODEL_NAME_PRIMARY) | |
| # ---- 4) Get a clean local snapshot (avoid corrupted cache) ---- | |
| def get_model_snapshot(repo_id: str, local_dir: str) -> str: | |
| # allow_patterns keeps it lean; remove if anything’s missing | |
| return snapshot_download( | |
| repo_id, | |
| local_dir=local_dir, | |
| local_dir_use_symlinks=False, | |
| force_download=True # ensure fresh download if previous attempt was partial | |
| ) | |
| model_path = None | |
| primary_ok = True | |
| try: | |
| print(f"⬇️ Downloading {MODEL_NAME_PRIMARY} …") | |
| model_path = get_model_snapshot(MODEL_NAME_PRIMARY, MODEL_LOCAL_DIR) | |
| except Exception as e: | |
| print(f"⚠️ Primary download failed: {e}") | |
| primary_ok = False | |
| if not primary_ok: | |
| try: | |
| print(f"➡️ Falling back to {MODEL_NAME_FALLBACK} …") | |
| model_path = get_model_snapshot(MODEL_NAME_FALLBACK, MODEL_LOCAL_DIR) | |
| except Exception as e: | |
| print(f"❌ Fallback download failed: {e}") | |
| model_path = None | |
| # ---- 5) Load model/tokenizer ---- | |
| generator = None | |
| model_loaded = False | |
| if model_path: | |
| try: | |
| print("🔄 Loading tokenizer & model…") | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) | |
| # H1 is BF16 on card; FP16 may work but BF16 is safer if hardware supports it. | |
| dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| torch_dtype=dtype, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, | |
| ) | |
| generator = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| torch_dtype=dtype, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| model_loaded = True | |
| print("✅ Model loaded successfully") | |
| except Exception as e: | |
| print(f"❌ Model loading failed: {e}") | |
| model_loaded = False | |
| # ---- 6) App logic ---- | |
| test_questions = [ | |
| "بدي شقة بالمالكي فيها شرفة وغسالة صحون.", | |
| "هل في شقة دوبلكس بالمزة الفيلات فيها موقفين سيارة؟", | |
| "بدي بيت عربي قديم بباب توما مع حديقة داخلية.", | |
| "أرخص شقة بالشعلان شو سعرها؟", | |
| "هل يوجد شقق بإطلالة جبلية في أبو رمانة؟", | |
| "بدي شقة مفروشة بالكامل بالمزة ٨٦، الطابق الأول.", | |
| "عندك منزل مستقل بالمهاجرين مع موقد حطب؟" | |
| ] | |
| def chat_falcon(user_input: str) -> str: | |
| if not model_loaded or generator is None: | |
| return "❌ النموذج غير محمّل. الرجاء إعادة المحاولة لاحقاً." | |
| # Compact, anchored instruction to reduce looping & keep it on-topic. | |
| prompt = ( | |
| "أنت مساعد عقارات ذكي. أجب بجملة أو جملتين واضحتين فقط.\n" | |
| f"السؤال: {user_input}\n" | |
| "الجواب:" | |
| ) | |
| out = generator( | |
| prompt, | |
| max_new_tokens=MAX_NEW_TOKENS, | |
| do_sample=True, | |
| temperature=TEMPERATURE, | |
| top_p=TOP_P, | |
| repetition_penalty=REPETITION_PENALTY, | |
| eos_token_id=generator.tokenizer.eos_token_id | |
| )[0]["generated_text"] | |
| # remove prompt echo if present | |
| return out.replace(prompt, "").strip() | |
| # ---- 7) Gradio UI (no share=True inside Spaces) ---- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🏠 Falcon H1 7B Instruct — Damascus Real Estate Test") | |
| gr.Markdown("اختبر قدرة النموذج على فهم الأسئلة بالعربية (فصحى ولهجة شامية).") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| tb = gr.Textbox(label="اكتب سؤالك هنا", lines=3, placeholder="مثال: بدي شقة بالمزة فيها بلكون") | |
| btn = gr.Button("🔎 أرسل") | |
| with gr.Column(scale=1): | |
| dd = gr.Dropdown(choices=test_questions, value=test_questions[0], label="🧾 أسئلة جاهزة") | |
| out = gr.Textbox(label="إجابة النموذج", lines=8) | |
| btn.click(chat_falcon, inputs=tb, outputs=out) | |
| dd.change(chat_falcon, inputs=dd, outputs=out) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |