Lec2Story / app.py
janashraff
naming2
bd8e399
import gradio as gr
import asyncio
import os
import sys
from langchain_mcp_adapters.client import MultiServerMCPClient
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent
import tempfile
import shutil
from datetime import datetime
import re
# Get API keys from Hugging Face Secrets
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
if not GEMINI_API_KEY or not ELEVENLABS_API_KEY:
raise ValueError("API keys must be set in Hugging Face Secrets")
os.environ["ELEVENLABS_API_KEY"] = ELEVENLABS_API_KEY
# Get the base directory
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# Add MCP server paths to Python path for module imports
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp"))
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader"))
sys.path.insert(0, os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop"))
class ReasoningLogger:
"""Captures and formats the agent's reasoning process with clean output"""
def __init__(self):
self.logs = []
self.current_phase = None
def log_phase(self, phase, content):
"""Log a major phase with enhanced formatting"""
timestamp = datetime.now().strftime("%H:%M:%S")
# Clean and format content
cleaned_content = self._clean_content(content)
separator = "─" * 80
formatted = f"\n{separator}\n {timestamp} | {phase}\n{separator}\n{cleaned_content}\n"
self.logs.append(formatted)
self.current_phase = phase
def log_action(self, action, details):
"""Log an action with clean formatting"""
timestamp = datetime.now().strftime("%H:%M:%S")
cleaned_details = self._clean_content(details)
formatted = f"\n {timestamp} | {action}\n{cleaned_details}\n"
self.logs.append(formatted)
def log_result(self, result):
"""Log a result with success formatting"""
timestamp = datetime.now().strftime("%H:%M:%S")
cleaned_result = self._clean_content(result)
formatted = f"\n {timestamp} | {cleaned_result}\n"
self.logs.append(formatted)
def log_step(self, step_num, description):
"""Log a numbered step"""
formatted = f" └─ Step {step_num}: {description}\n"
self.logs.append(formatted)
def _clean_content(self, content):
"""Clean and format content for better readability"""
if not content:
return ""
# Convert to string
content = str(content)
# Remove excessive whitespace
content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
# Remove technical noise patterns
noise_patterns = [
r'messages=\[.*?\]',
r'content=\'.*?\'(?=\s|$)',
r'ToolMessage\(.*?\)',
r'additional_kwargs=\{.*?\}',
r'response_metadata=\{.*?\}',
r'id=\'.*?\'',
r'usage_metadata=\{.*?\}',
]
for pattern in noise_patterns:
content = re.sub(pattern, '', content, flags=re.DOTALL)
# Clean up result formatting
if 'AgentFinish' in content:
# Extract only the meaningful output
match = re.search(r'output[\'"]:\s*[\'"](.+?)[\'"]', content, re.DOTALL)
if match:
content = match.group(1)
# Format tool calls nicely
content = re.sub(r'name=\'(\w+)\'', r'\n Tool: \1', content)
content = re.sub(r'args=\{([^}]+)\}', lambda m: f'\n Parameters: {self._format_args(m.group(1))}', content)
# Truncate very long outputs
lines = content.split('\n')
if len(lines) > 30:
content = '\n'.join(lines[:25]) + f'\n\n... ({len(lines) - 25} more lines) ...\n'
return content.strip()
def _format_args(self, args_str):
"""Format tool arguments nicely"""
# Simplify argument display
args_str = args_str.replace('\'', '').replace('"', '')
if len(args_str) > 100:
return args_str[:100] + '...'
return args_str
def get_log(self):
"""Return formatted log output"""
header = """
"""
return header + "".join(self.logs)
async def run_agent_with_reasoning(age: int, gender: str, topic: str, pdf_temp_path: str, progress=gr.Progress()):
logger = ReasoningLogger()
output_dir = tempfile.mkdtemp()
# Phase 1: Planning
progress(0.1, desc="Agent is analyzing task and creating plan...")
planning_prompt = f"""
You are an autonomous teaching agent. Analyze this task and create a concise plan:
TASK: Create an engaging audio story for a {age}-year-old {gender} student about "{topic}" based on a lecture PDF.
Provide a brief, numbered plan (4-5 steps maximum) without excessive detail.
"""
# Initialize LLM for planning
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
google_api_key=GEMINI_API_KEY,
temperature=0.7
)
try:
planning_response = await llm.ainvoke(planning_prompt)
plan_text = planning_response.content if hasattr(planning_response, 'content') else str(planning_response)
# Extract only the plan steps
plan_lines = [line for line in plan_text.split('\n') if line.strip() and (line.strip()[0].isdigit() or line.strip().startswith('-'))]
clean_plan = '\n'.join(plan_lines[:5]) # Limit to 5 steps
logger.log_phase("PLANNING", clean_plan)
yield logger.get_log(), None
except Exception as e:
logger.log_phase("PLANNING ERROR", str(e))
yield logger.get_log(), None
return
# Phase 2: Tool Setup
progress(0.2, desc="🔧 Setting up MCP tools...")
logger.log_action("TOOL INITIALIZATION", "Connecting to: PDF Reader, AI Writer, ElevenLabs TTS")
yield logger.get_log(), None
python_exe = sys.executable
client = MultiServerMCPClient({
"pdf-reader": {
"transport": "stdio",
"command": python_exe,
"args": [os.path.join(BASE_DIR, "mcp_servers", "mcp_pdf_reader", "src", "server.py")]
},
"ai-writer": {
"transport": "stdio",
"command": python_exe,
"args": [os.path.join(BASE_DIR, "mcp_servers", "ai_writers_workshop", "mcp_server", "server.py")]
},
"ElevenLabs": {
"transport": "stdio",
"command": python_exe,
"args": [os.path.join(BASE_DIR, "mcp_servers", "elevenlabs-mcp", "elevenlabs_mcp", "server.py")],
"env": {"ELEVENLABS_API_KEY": ELEVENLABS_API_KEY}
}
})
all_tools = []
seen = set()
for server_name in ["pdf-reader", "ai-writer", "ElevenLabs"]:
async with client.session(server_name):
tools = await client.get_tools()
for t in tools:
if t.name not in seen:
all_tools.append(t)
seen.add(t.name)
logger.log_result(f"Loaded {len(all_tools)} tools: {', '.join([t.name for t in all_tools])}")
yield logger.get_log(), None
# Phase 3: Autonomous Execution
progress(0.3, desc="🤖 Agent executing plan autonomously...")
system_instruction = f"""
You are an autonomous teaching agent. Be concise in your responses.
CONTEXT:
- Student: {age}-year-old {gender}
- Topic: "{topic}"
- PDF Path: {pdf_temp_path}
- Audio Output Directory: {output_dir}
YOUR WORKFLOW:
1. Read PDF and extract relevant content about the topic
2. Write an age-appropriate story teaching key concepts
3. Generate audio with output_directory: "{output_dir}"
Execute autonomously. Provide brief status updates only when starting a new major step.
"""
agent = create_agent(model=llm, tools=all_tools)
agent_input = {
"messages": [
{"role": "system", "content": system_instruction},
{
"role": "user",
"content": f"Execute the plan. Give brief updates for each major step."
}
]
}
logger.log_phase("EXECUTION", "Agent is working autonomously...")
yield logger.get_log(), None
progress(0.5, desc="📖 Processing content...")
try:
result = await agent.ainvoke(agent_input)
# Extract clean summary from result
result_text = str(result)
# Try to extract key information
if 'output' in result:
summary = result.get('output', 'Execution completed')
else:
# Extract just the essential info
summary_match = re.search(r'(Story.*?generated|Audio.*?created|File saved.*?\.mp3)', result_text, re.IGNORECASE | re.DOTALL)
summary = summary_match.group(0) if summary_match else "Task completed successfully"
if len(summary) > 200:
summary = summary[:200] + "..."
logger.log_phase("EXECUTION COMPLETE", summary)
progress(0.9, desc="🎵 Finalizing audio generation...")
yield logger.get_log(), None
# Look for audio file
audio_path = None
if output_dir and os.path.exists(output_dir):
mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')]
if mp3_files:
audio_path = os.path.join(output_dir, mp3_files[0])
logger.log_result(f"Audio generated: {mp3_files[0]}")
# Check result for file paths
if not audio_path and "File saved as:" in result_text:
match = re.search(r'File saved as:\s*([^\s]+\.mp3)', result_text)
if match:
file_path = match.group(1)
if os.path.exists(file_path):
audio_path = file_path
logger.log_result(f"Audio file: {os.path.basename(file_path)}")
if not audio_path:
logger.log_result("⚠️ Audio generation completed but file location uncertain")
progress(1.0, desc="✅ Complete!")
yield logger.get_log(), audio_path
except Exception as e:
logger.log_phase("ERROR", str(e))
yield logger.get_log(), None
def gradio_handler(age, gender, topic, pdf_file, progress=gr.Progress()):
if not pdf_file:
return "❌ Please upload a PDF.", None
temp_dir = tempfile.mkdtemp()
pdf_path = os.path.join(temp_dir, "lecture.pdf")
shutil.copy(pdf_file, pdf_path)
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Use async generator to get updates
generator = run_agent_with_reasoning(age, gender, topic, pdf_path, progress)
final_log = None
final_audio = None
# Run through all updates
async def run_generator():
nonlocal final_log, final_audio
async for log, audio in generator:
final_log = log
final_audio = audio
loop.run_until_complete(run_generator())
return final_log, final_audio
finally:
loop.close()
except Exception as e:
import traceback
return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
with gr.Blocks() as demo:
gr.Markdown(
"""
<h1 style='text-align:center;'>LOTUS</h1>
<p style='text-align:center; font-size:18px;'>
"Lecture Overwritten To Unique Story"<br>
</p>
"""
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Student Configuration")
age = gr.Number(label="Student Age", value=12, minimum=5, maximum=18)
gender = gr.Radio(["male", "female"], value="female", label="Student Gender")
topic = gr.Textbox(label="Topic / Concept", placeholder="e.g., Introduction to chemical reactions...")
pdf_input = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
generate_btn = gr.Button("Start Autonomous Agent", variant="primary", size="lg")
with gr.Column(scale=2):
gr.Markdown("### Agent Reasoning & Execution Log")
output_text = gr.Textbox(
label="Autonomous Agent Process",
lines=20,
max_lines=25
)
with gr.Row():
audio_out = gr.Audio(label="🎵 Generated The Audio Story")
generate_btn.click(
fn=gradio_handler,
inputs=[age, gender, topic, pdf_input],
outputs=[output_text, audio_out]
)
if __name__ == "__main__":
demo.launch()