Spaces:

MCP-1st-Birthday
/

Lec2Story

Running

Lec2Story / app.py

janashraff

naming2

bd8e399 17 days ago

13 kB

	import gradio as gr
	import asyncio
	import os
	import sys
	from langchain_mcp_adapters.client import MultiServerMCPClient
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.agents import create_agent
	import tempfile
	import shutil
	from datetime import datetime
	import re

	# Get API keys from Hugging Face Secrets
	GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
	ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")

	if not GEMINI_API_KEY or not ELEVENLABS_API_KEY:
	raise ValueError("API keys must be set in Hugging Face Secrets")

	os.environ["ELEVENLABS_API_KEY"] = ELEVENLABS_API_KEY

	# Get the base directory
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))

	# Add MCP server paths to Python path for module imports
	sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp"))
	sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader"))
	sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop"))

	class ReasoningLogger:
	"""Captures and formats the agent's reasoning process with clean output"""
	def __init__(self):
	self.logs = []
	self.current_phase = None

	def log_phase(self, phase, content):
	"""Log a major phase with enhanced formatting"""
	timestamp = datetime.now().strftime("%H:%M:%S")

	# Clean and format content
	cleaned_content = self._clean_content(content)

	separator = "─" * 80
	formatted = f"\n{separator}\n {timestamp} \| {phase}\n{separator}\n{cleaned_content}\n"
	self.logs.append(formatted)
	self.current_phase = phase

	def log_action(self, action, details):
	"""Log an action with clean formatting"""
	timestamp = datetime.now().strftime("%H:%M:%S")
	cleaned_details = self._clean_content(details)
	formatted = f"\n {timestamp} \| {action}\n{cleaned_details}\n"
	self.logs.append(formatted)

	def log_result(self, result):
	"""Log a result with success formatting"""
	timestamp = datetime.now().strftime("%H:%M:%S")
	cleaned_result = self._clean_content(result)
	formatted = f"\n {timestamp} \| {cleaned_result}\n"
	self.logs.append(formatted)

	def log_step(self, step_num, description):
	"""Log a numbered step"""
	formatted = f" └─ Step {step_num}: {description}\n"
	self.logs.append(formatted)

	def _clean_content(self, content):
	"""Clean and format content for better readability"""
	if not content:
	return ""

	# Convert to string
	content = str(content)

	# Remove excessive whitespace
	content = re.sub(r'\n\s\n\s\n+', '\n\n', content)

	# Remove technical noise patterns
	noise_patterns = [
	r'messages=\[.*?\]',
	r'content=\'.*?\'(?=\s\|$)',
	r'ToolMessage$.*?$',
	r'additional_kwargs=\{.*?\}',
	r'response_metadata=\{.*?\}',
	r'id=\'.*?\'',
	r'usage_metadata=\{.*?\}',
	]

	for pattern in noise_patterns:
	content = re.sub(pattern, '', content, flags=re.DOTALL)

	# Clean up result formatting
	if 'AgentFinish' in content:
	# Extract only the meaningful output
	match = re.search(r'output[\'"]:\s*[\'"](.+?)[\'"]', content, re.DOTALL)
	if match:
	content = match.group(1)

	# Format tool calls nicely
	content = re.sub(r'name=\'(\w+)\'', r'\n Tool: \1', content)
	content = re.sub(r'args=\{([^}]+)\}', lambda m: f'\n Parameters: {self._format_args(m.group(1))}', content)

	# Truncate very long outputs
	lines = content.split('\n')
	if len(lines) > 30:
	content = '\n'.join(lines[:25]) + f'\n\n... ({len(lines) - 25} more lines) ...\n'

	return content.strip()

	def _format_args(self, args_str):
	"""Format tool arguments nicely"""
	# Simplify argument display
	args_str = args_str.replace('\'', '').replace('"', '')
	if len(args_str) > 100:
	return args_str[:100] + '...'
	return args_str

	def get_log(self):
	"""Return formatted log output"""
	header = """
	"""
	return header + "".join(self.logs)

	async def run_agent_with_reasoning(age: int, gender: str, topic: str, pdf_temp_path: str, progress=gr.Progress()):
	logger = ReasoningLogger()
	output_dir = tempfile.mkdtemp()

	# Phase 1: Planning
	progress(0.1, desc="Agent is analyzing task and creating plan...")

	planning_prompt = f"""
	You are an autonomous teaching agent. Analyze this task and create a concise plan:

	TASK: Create an engaging audio story for a {age}-year-old {gender} student about "{topic}" based on a lecture PDF.

	Provide a brief, numbered plan (4-5 steps maximum) without excessive detail.
	"""

	# Initialize LLM for planning
	llm = ChatGoogleGenerativeAI(
	model="gemini-2.5-flash",
	google_api_key=GEMINI_API_KEY,
	temperature=0.7
	)

	try:
	planning_response = await llm.ainvoke(planning_prompt)
	plan_text = planning_response.content if hasattr(planning_response, 'content') else str(planning_response)

	# Extract only the plan steps
	plan_lines = [line for line in plan_text.split('\n') if line.strip() and (line.strip()[0].isdigit() or line.strip().startswith('-'))]
	clean_plan = '\n'.join(plan_lines[:5]) # Limit to 5 steps

	logger.log_phase("PLANNING", clean_plan)
	yield logger.get_log(), None
	except Exception as e:
	logger.log_phase("PLANNING ERROR", str(e))
	yield logger.get_log(), None
	return

	# Phase 2: Tool Setup
	progress(0.2, desc="🔧 Setting up MCP tools...")
	logger.log_action("TOOL INITIALIZATION", "Connecting to: PDF Reader, AI Writer, ElevenLabs TTS")
	yield logger.get_log(), None

	python_exe = sys.executable

	client = MultiServerMCPClient({
	"pdf-reader": {
	"transport": "stdio",
	"command": python_exe,
	"args": [os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader", "src", "server.py")]
	},
	"ai-writer": {
	"transport": "stdio",
	"command": python_exe,
	"args": [os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop", "mcp_server", "server.py")]
	},
	"ElevenLabs": {
	"transport": "stdio",
	"command": python_exe,
	"args": [os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp", "elevenlabs_mcp", "server.py")],
	"env": {"ELEVENLABS_API_KEY": ELEVENLABS_API_KEY}
	}
	})

	all_tools = []
	seen = set()
	for server_name in ["pdf-reader", "ai-writer", "ElevenLabs"]:
	async with client.session(server_name):
	tools = await client.get_tools()
	for t in tools:
	if t.name not in seen:
	all_tools.append(t)
	seen.add(t.name)

	logger.log_result(f"Loaded {len(all_tools)} tools: {', '.join([t.name for t in all_tools])}")
	yield logger.get_log(), None

	# Phase 3: Autonomous Execution
	progress(0.3, desc="🤖 Agent executing plan autonomously...")

	system_instruction = f"""
	You are an autonomous teaching agent. Be concise in your responses.

	CONTEXT:
	- Student: {age}-year-old {gender}
	- Topic: "{topic}"
	- PDF Path: {pdf_temp_path}
	- Audio Output Directory: {output_dir}

	YOUR WORKFLOW:
	1. Read PDF and extract relevant content about the topic
	2. Write an age-appropriate story teaching key concepts
	3. Generate audio with output_directory: "{output_dir}"

	Execute autonomously. Provide brief status updates only when starting a new major step.
	"""

	agent = create_agent(model=llm, tools=all_tools)

	agent_input = {
	"messages": [
	{"role": "system", "content": system_instruction},
	{
	"role": "user",
	"content": f"Execute the plan. Give brief updates for each major step."
	}
	]
	}

	logger.log_phase("EXECUTION", "Agent is working autonomously...")
	yield logger.get_log(), None

	progress(0.5, desc="📖 Processing content...")

	try:
	result = await agent.ainvoke(agent_input)

	# Extract clean summary from result
	result_text = str(result)

	# Try to extract key information
	if 'output' in result:
	summary = result.get('output', 'Execution completed')
	else:
	# Extract just the essential info
	summary_match = re.search(r'(Story.?generated\|Audio.?created\|File saved.*?\.mp3)', result_text, re.IGNORECASE \| re.DOTALL)
	summary = summary_match.group(0) if summary_match else "Task completed successfully"
	if len(summary) > 200:
	summary = summary[:200] + "..."

	logger.log_phase("EXECUTION COMPLETE", summary)

	progress(0.9, desc="🎵 Finalizing audio generation...")
	yield logger.get_log(), None

	# Look for audio file
	audio_path = None
	if output_dir and os.path.exists(output_dir):
	mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')]
	if mp3_files:
	audio_path = os.path.join(output_dir, mp3_files[0])
	logger.log_result(f"Audio generated: {mp3_files[0]}")

	# Check result for file paths
	if not audio_path and "File saved as:" in result_text:
	match = re.search(r'File saved as:\s*([^\s]+\.mp3)', result_text)
	if match:
	file_path = match.group(1)
	if os.path.exists(file_path):
	audio_path = file_path
	logger.log_result(f"Audio file: {os.path.basename(file_path)}")

	if not audio_path:
	logger.log_result("⚠️ Audio generation completed but file location uncertain")

	progress(1.0, desc="✅ Complete!")
	yield logger.get_log(), audio_path

	except Exception as e:
	logger.log_phase("ERROR", str(e))
	yield logger.get_log(), None


	def gradio_handler(age, gender, topic, pdf_file, progress=gr.Progress()):
	if not pdf_file:
	return "❌ Please upload a PDF.", None

	temp_dir = tempfile.mkdtemp()
	pdf_path = os.path.join(temp_dir, "lecture.pdf")
	shutil.copy(pdf_file, pdf_path)

	try:
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	try:
	# Use async generator to get updates
	generator = run_agent_with_reasoning(age, gender, topic, pdf_path, progress)
	final_log = None
	final_audio = None

	# Run through all updates
	async def run_generator():
	nonlocal final_log, final_audio
	async for log, audio in generator:
	final_log = log
	final_audio = audio

	loop.run_until_complete(run_generator())

	return final_log, final_audio

	finally:
	loop.close()

	except Exception as e:
	import traceback
	return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
	finally:
	shutil.rmtree(temp_dir, ignore_errors=True)


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	<h1 style='text-align:center;'>LOTUS</h1>

	<p style='text-align:center; font-size:18px;'>
	"Lecture Overwritten To Unique Story"<br>
	</p>
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Student Configuration")
	age = gr.Number(label="Student Age", value=12, minimum=5, maximum=18)
	gender = gr.Radio(["male", "female"], value="female", label="Student Gender")
	topic = gr.Textbox(label="Topic / Concept", placeholder="e.g., Introduction to chemical reactions...")
	pdf_input = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
	generate_btn = gr.Button("Start Autonomous Agent", variant="primary", size="lg")

	with gr.Column(scale=2):
	gr.Markdown("### Agent Reasoning & Execution Log")
	output_text = gr.Textbox(
	label="Autonomous Agent Process",
	lines=20,
	max_lines=25
	)

	with gr.Row():
	audio_out = gr.Audio(label="🎵 Generated The Audio Story")

	generate_btn.click(
	fn=gradio_handler,
	inputs=[age, gender, topic, pdf_input],
	outputs=[output_text, audio_out]
	)

	if __name__ == "__main__":
	demo.launch()