Spaces:
Running
Running
| import modal | |
| import json | |
| from datasets import Dataset | |
| import time | |
| modal.enable_output() | |
| app = modal.App("fistalfinetuner") | |
| volume = modal.Volume.from_name("fistal-models", create_if_missing=True ) | |
| modal_image = ( | |
| modal.Image.debian_slim(python_version="3.11") | |
| .apt_install("git") | |
| .pip_install( | |
| "torch>=2.6.0", | |
| "torchvision", | |
| "torchaudio", | |
| extra_index_url="https://download.pytorch.org/whl/cu121", | |
| ) | |
| .pip_install( | |
| "transformers", | |
| "datasets", | |
| "accelerate", | |
| "trl", | |
| "bitsandbytes", | |
| "peft", | |
| "unsloth_zoo", | |
| "datasets==4.3.0" | |
| ) | |
| .pip_install( | |
| "unsloth @ git+https://github.com/unslothai/unsloth.git" | |
| ) | |
| ) | |
| def train_with_modal(ft_data: str, model_name: str): | |
| """ | |
| Finetuning model using Modal's GPU | |
| """ | |
| import torch | |
| if not torch.cuda.is_available(): | |
| return {"status": "error", "message": "No GPU available!"} | |
| from unsloth import FastLanguageModel, is_bf16_supported | |
| from transformers import TrainingArguments | |
| from trl import SFTTrainer | |
| import os | |
| data = [] | |
| for line in ft_data.strip().split('\n'): | |
| if line.strip(): | |
| data.append(json.loads(line)) | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=model_name, | |
| max_seq_length=512, | |
| load_in_4bit=True, | |
| dtype=None | |
| ) | |
| print("Configuring LoRA...") | |
| model = FastLanguageModel.get_peft_model( | |
| model, | |
| r=128, | |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| lora_alpha=16, | |
| lora_dropout=0, | |
| bias="none", | |
| random_state=2001, | |
| use_gradient_checkpointing="unsloth", | |
| loftq_config=None, | |
| use_rslora=False | |
| ) | |
| def format_example(example): | |
| text = tokenizer.apply_chat_template( | |
| example['messages'], | |
| tokenize=False, | |
| add_generation_prompt=False | |
| ) | |
| return {"text": text} | |
| dataset = Dataset.from_list(data) | |
| dataset = dataset.map(format_example) | |
| trainer = SFTTrainer( | |
| model=model, | |
| tokenizer=tokenizer, | |
| train_dataset=dataset, | |
| dataset_text_field="text", | |
| max_seq_length=2000, | |
| dataset_num_proc=2, | |
| args=TrainingArguments( | |
| per_device_train_batch_size=2, | |
| gradient_accumulation_steps=8, | |
| warmup_steps=5, | |
| num_train_epochs=1, | |
| max_steps=30, | |
| learning_rate=2e-4, | |
| fp16=not is_bf16_supported(), | |
| bf16=is_bf16_supported(), | |
| logging_steps=1, | |
| optim="adamw_8bit", | |
| lr_scheduler_type="linear", | |
| output_dir="/tmp/training_output", | |
| seed=42, | |
| report_to="none", | |
| dataloader_num_workers=0 | |
| ) | |
| ) | |
| print("Training started...") | |
| trainer.train() | |
| print("Training complete!") | |
| timestamp = int(time.time()) | |
| volume_path = f"/models/finetuned-{timestamp}" | |
| os.makedirs(volume_path, exist_ok=True) | |
| print(f"Saving to: {volume_path}") | |
| model.save_pretrained_merged(volume_path, tokenizer, save_method="merged_16bit") | |
| print("Model saved!") | |
| model.config.save_pretrained(volume_path) | |
| trainer.save_model(volume_path) | |
| tokenizer.save_pretrained(volume_path) | |
| volume.commit() | |
| print("Volume has been committed!") | |
| del model | |
| del trainer | |
| import gc | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| return { | |
| "status":"success", | |
| "volume_path":volume_path, | |
| "timestamp": timestamp | |
| } | |
| def upload_to_hf_from_volume(volume_path: str, timestamp: int, repoName: str): | |
| """ | |
| Upload model directly from Modal Volume to HuggingFace | |
| This runs on Modal's fast network - no download to local machine needed! | |
| """ | |
| from huggingface_hub import HfApi, create_repo | |
| import os | |
| print(f"π€ Uploading from {volume_path} to HuggingFace...") | |
| if not os.path.exists(volume_path): | |
| raise FileNotFoundError(f"Model not found at: {volume_path}") | |
| hf_token = os.environ.get("HF_TOKEN") | |
| if not hf_token: | |
| raise ValueError("HF_TOKEN not found in Modal secrets") | |
| hf_api = HfApi() | |
| repo_id = f"mahreenfathima/finetuned-{repoName}-{timestamp}" | |
| print(f"Creating HuggingFace repo: {repo_id}") | |
| create_repo( | |
| repo_id=repo_id, | |
| token=hf_token, | |
| private=False, | |
| exist_ok=True, | |
| repo_type="model" | |
| ) | |
| print(f"Uploading files to {repo_id}...") | |
| hf_api.upload_folder( | |
| folder_path=volume_path, | |
| repo_id=repo_id, | |
| token=hf_token, | |
| commit_message=f"Fine-tuned model (timestamp: {timestamp})" | |
| ) | |
| model_url = f"https://huggingface.co/{repo_id}" | |
| print(f"β Successfully uploaded to {model_url}") | |
| return { | |
| "model_url": model_url, | |
| "repo_id": repo_id | |
| } | |
| def evaluate_model(repo_id: str, test_inputs: list[str]): | |
| """Load model and run inference on test cases""" | |
| from unsloth import FastLanguageModel | |
| from transformers import AutoTokenizer | |
| import torch | |
| print(f"Loading model: {repo_id}") | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=repo_id, | |
| max_seq_length=512, | |
| load_in_4bit=True, | |
| dtype=None, | |
| ) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| outputs = [] | |
| for test_input in test_inputs: | |
| print(f"Processing: {test_input[:50]}...") | |
| inputs = tokenizer(test_input, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=100, | |
| temperature=0.5, | |
| do_sample=True | |
| ) | |
| decoded = tokenizer.decode(output[0], skip_special_tokens=True) | |
| if decoded.startswith(test_input): | |
| decoded = decoded[len(test_input):].strip() | |
| outputs.append(decoded) | |
| return outputs | |