Spaces:

MCP-1st-Birthday
/

FistalAI

Running

App Files Files Community

FistalAI / modal_tool.py

mahreenfathima

Upload 8 files

f391dd9 verified 15 days ago

raw

history blame contribute delete

6.82 kB

	import modal
	import json
	from datasets import Dataset
	import time


	modal.enable_output()

	app = modal.App("fistalfinetuner")

	volume = modal.Volume.from_name("fistal-models", create_if_missing=True )





	modal_image = (
	modal.Image.debian_slim(python_version="3.11")
	.apt_install("git")
	.pip_install(
	"torch>=2.6.0",
	"torchvision",
	"torchaudio",
	extra_index_url="https://download.pytorch.org/whl/cu121",

	)
	.pip_install(
	"transformers",
	"datasets",
	"accelerate",
	"trl",
	"bitsandbytes",
	"peft",
	"unsloth_zoo",
	"datasets==4.3.0"
	)
	.pip_install(
	"unsloth @ git+https://github.com/unslothai/unsloth.git"
	)
	)

	@app.function(
	image=modal_image,
	gpu="T4",
	timeout=3600,
	volumes={"/models":volume},
	retries=modal.Retries(max_retries=0, backoff_coefficient=1.0)
	)
	def train_with_modal(ft_data: str, model_name: str):
	"""
	Finetuning model using Modal's GPU
	"""
	import torch

	if not torch.cuda.is_available():
	return {"status": "error", "message": "No GPU available!"}

	from unsloth import FastLanguageModel, is_bf16_supported
	from transformers import TrainingArguments
	from trl import SFTTrainer
	import os

	data = []
	for line in ft_data.strip().split('\n'):
	if line.strip():
	data.append(json.loads(line))

	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_name,
	max_seq_length=512,
	load_in_4bit=True,
	dtype=None
	)

	print("Configuring LoRA...")
	model = FastLanguageModel.get_peft_model(
	model,
	r=128,
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
	lora_alpha=16,
	lora_dropout=0,
	bias="none",
	random_state=2001,
	use_gradient_checkpointing="unsloth",
	loftq_config=None,
	use_rslora=False
	)

	def format_example(example):
	text = tokenizer.apply_chat_template(
	example['messages'],
	tokenize=False,
	add_generation_prompt=False
	)
	return {"text": text}

	dataset = Dataset.from_list(data)
	dataset = dataset.map(format_example)

	trainer = SFTTrainer(
	model=model,
	tokenizer=tokenizer,
	train_dataset=dataset,
	dataset_text_field="text",
	max_seq_length=2000,
	dataset_num_proc=2,
	args=TrainingArguments(
	per_device_train_batch_size=2,
	gradient_accumulation_steps=8,
	warmup_steps=5,
	num_train_epochs=1,
	max_steps=30,
	learning_rate=2e-4,
	fp16=not is_bf16_supported(),
	bf16=is_bf16_supported(),
	logging_steps=1,
	optim="adamw_8bit",
	lr_scheduler_type="linear",
	output_dir="/tmp/training_output",
	seed=42,
	report_to="none",
	dataloader_num_workers=0
	)
	)
	print("Training started...")
	trainer.train()
	print("Training complete!")

	timestamp = int(time.time())
	volume_path = f"/models/finetuned-{timestamp}"

	os.makedirs(volume_path, exist_ok=True)
	print(f"Saving to: {volume_path}")


	model.save_pretrained_merged(volume_path, tokenizer, save_method="merged_16bit")
	print("Model saved!")
	model.config.save_pretrained(volume_path)

	trainer.save_model(volume_path)
	tokenizer.save_pretrained(volume_path)





	volume.commit()
	print("Volume has been committed!")

	del model
	del trainer
	import gc
	gc.collect()
	torch.cuda.empty_cache()

	return {
	"status":"success",
	"volume_path":volume_path,
	"timestamp": timestamp

	}




	@app.function(
	image=modal_image,
	volumes={"/models": volume},
	timeout=900,
	secrets=[modal.Secret.from_name("huggingface-secret")]
	)
	def upload_to_hf_from_volume(volume_path: str, timestamp: int, repoName: str):
	"""
	Upload model directly from Modal Volume to HuggingFace
	This runs on Modal's fast network - no download to local machine needed!
	"""
	from huggingface_hub import HfApi, create_repo
	import os

	print(f"📤 Uploading from {volume_path} to HuggingFace...")

	if not os.path.exists(volume_path):
	raise FileNotFoundError(f"Model not found at: {volume_path}")

	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	raise ValueError("HF_TOKEN not found in Modal secrets")

	hf_api = HfApi()
	repo_id = f"mahreenfathima/finetuned-{repoName}-{timestamp}"

	print(f"Creating HuggingFace repo: {repo_id}")
	create_repo(
	repo_id=repo_id,
	token=hf_token,
	private=False,
	exist_ok=True,
	repo_type="model"
	)

	print(f"Uploading files to {repo_id}...")
	hf_api.upload_folder(
	folder_path=volume_path,
	repo_id=repo_id,
	token=hf_token,
	commit_message=f"Fine-tuned model (timestamp: {timestamp})"
	)

	model_url = f"https://huggingface.co/{repo_id}"
	print(f"✅ Successfully uploaded to {model_url}")

	return {
	"model_url": model_url,
	"repo_id": repo_id
	}

	@app.function(
	gpu="T4",
	timeout=600,
	image=modal_image
	)
	def evaluate_model(repo_id: str, test_inputs: list[str]):
	"""Load model and run inference on test cases"""
	from unsloth import FastLanguageModel
	from transformers import AutoTokenizer
	import torch

	print(f"Loading model: {repo_id}")
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=repo_id,
	max_seq_length=512,
	load_in_4bit=True,
	dtype=None,
	)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token


	outputs = []
	for test_input in test_inputs:
	print(f"Processing: {test_input[:50]}...")
	inputs = tokenizer(test_input, return_tensors="pt").to(model.device)

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=100,
	temperature=0.5,
	do_sample=True
	)

	decoded = tokenizer.decode(output[0], skip_special_tokens=True)
	if decoded.startswith(test_input):
	decoded = decoded[len(test_input):].strip()
	outputs.append(decoded)

	return outputs