agentic-language-partner / src /app /flashcards_tools.py
yusenthebot
Integrate advanced OCR, FlashcardGenerator, DifficultyScorer, and AI Quiz from language project
aa3fdef
# -*- coding: utf-8 -*-
"""
Flashcards Tools - Enhanced with FlashcardGenerator and DifficultyScorer
"""
import json
import re
from pathlib import Path
from typing import Dict, List, Optional, Any
from deep_translator import GoogleTranslator
from .config import get_user_dir
# Import advanced generators (with fallback)
try:
from .flashcard_generator import FlashcardGenerator
HAS_FLASHCARD_GENERATOR = True
except ImportError:
HAS_FLASHCARD_GENERATOR = False
try:
from .difficulty_scorer import get_difficulty_scorer
HAS_DIFFICULTY_SCORER = True
except ImportError:
HAS_DIFFICULTY_SCORER = False
def _get_decks_dir(username: str) -> Path:
"""Returns the directory where all of a user's decks are stored."""
user_dir = get_user_dir(username)
decks_dir = user_dir / "decks"
decks_dir.mkdir(parents=True, exist_ok=True)
return decks_dir
def list_user_decks(username: str) -> Dict[str, Path]:
"""Returns a mapping of deck name -> deck json path."""
decks_dir = _get_decks_dir(username)
deck_files = sorted(decks_dir.glob("*.json"))
decks: Dict[str, Path] = {}
for path in deck_files:
try:
data = json.loads(path.read_text(encoding="utf-8"))
name = data.get("name") or path.stem
except Exception:
name = path.stem
if name in decks and decks[name] != path:
name = f"{name} ({path.stem})"
decks[name] = path
return decks
def _ensure_card_stats(card: Dict) -> None:
"""Ensure that a card has simple spaced-repetition stats."""
if "score" not in card:
card["score"] = 0
if "reviews" not in card:
card["reviews"] = 0
def _add_difficulty_to_card(card: Dict) -> Dict:
"""Add difficulty scoring to a card if DifficultyScorer is available."""
if HAS_DIFFICULTY_SCORER:
try:
scorer = get_difficulty_scorer()
return scorer.score_flashcard(card)
except Exception:
pass
return card
def load_deck(path: Path) -> Dict:
"""Loads a deck from JSON with stats for spaced repetition."""
try:
data = json.loads(path.read_text(encoding="utf-8"))
except Exception:
data = {}
if "cards" not in data or not isinstance(data["cards"], list):
data["cards"] = []
if "name" not in data:
data["name"] = path.stem
if "tags" not in data or not isinstance(data["tags"], list):
data["tags"] = []
for card in data["cards"]:
_ensure_card_stats(card)
return data
def save_deck(path: Path, deck: Dict) -> None:
"""Saves deck to JSON."""
if "cards" not in deck:
deck["cards"] = []
if "name" not in deck:
deck["name"] = path.stem
if "tags" not in deck or not isinstance(deck["tags"], list):
deck["tags"] = []
for card in deck["cards"]:
_ensure_card_stats(card)
path.write_text(json.dumps(deck, indent=2, ensure_ascii=False), encoding="utf-8")
def _extract_candidate_words(text: str) -> List[str]:
"""Simple tokenizer & filter for candidate vocab words."""
tokens = re.findall(r"\b\w+\b", text, flags=re.UNICODE)
out = []
seen = set()
for t in tokens:
t_norm = t.strip()
if len(t_norm) < 2:
continue
if any(ch.isdigit() for ch in t_norm):
continue
lower = t_norm.lower()
if lower in seen:
continue
seen.add(lower)
out.append(t_norm)
return out
def generate_flashcards_from_ocr_results(
username: str,
ocr_results: List[Dict],
deck_name: str = "ocr",
target_lang: str = "en",
tags: Optional[List[str]] = None,
use_advanced_generator: bool = True,
) -> Path:
"""
Takes OCR results and constructs a vocab deck.
Args:
username: User identifier
ocr_results: List of OCR result dicts with 'text' key
deck_name: Name for the deck
target_lang: Target language for translations
tags: Optional tags for the deck
use_advanced_generator: Whether to use FlashcardGenerator
Returns:
Path to the saved deck
"""
# Try advanced generator first
if use_advanced_generator and HAS_FLASHCARD_GENERATOR:
try:
generator = FlashcardGenerator()
flashcard_data = generator.generate_flashcards(ocr_results, target_lang)
cards = flashcard_data.get('cards', [])
if cards:
# Add difficulty scores
if HAS_DIFFICULTY_SCORER:
scorer = get_difficulty_scorer()
cards = scorer.score_all_flashcards(cards)
# Ensure stats
for card in cards:
_ensure_card_stats(card)
decks_dir = _get_decks_dir(username)
deck_path = decks_dir / f"{deck_name}.json"
deck = {
"name": deck_name,
"cards": cards,
"tags": tags or ["ocr"],
"metadata": flashcard_data.get('metadata', {})
}
save_deck(deck_path, deck)
return deck_path
except Exception as e:
print(f"[flashcards_tools] Advanced generator failed: {e}, using fallback")
# Fallback to simple extraction
all_text = []
for res in ocr_results:
t = res.get("text") or res.get("raw_text") or res.get("original_text") or ""
if t:
all_text.append(t)
joined = "\n".join(all_text)
words = _extract_candidate_words(joined)
if not words:
raise ValueError("No candidate words found in OCR results.")
translator = GoogleTranslator(source="auto", target=target_lang)
cards = []
for w in words[:20]: # Limit to 20 words
try:
trans = translator.translate(w)
except Exception:
continue
if not trans:
continue
if trans.strip().lower() == w.strip().lower():
continue
card = {
"front": w,
"back": trans,
"content_type": "ocr_vocab",
"language": target_lang,
}
card = _add_difficulty_to_card(card)
_ensure_card_stats(card)
cards.append(card)
if not cards:
raise ValueError("No translatable words found to build cards.")
decks_dir = _get_decks_dir(username)
deck_path = decks_dir / f"{deck_name}.json"
deck = {
"name": deck_name,
"cards": cards,
"tags": tags or [],
}
save_deck(deck_path, deck)
return deck_path
def generate_flashcards_from_text(
username: str,
text: str,
deck_name: str = "conversation",
target_lang: str = "en",
tags: Optional[List[str]] = None,
source_lang: Optional[str] = None,
) -> Path:
"""
Build a vocab deck from raw text.
Args:
username: User identifier
text: Raw text to extract vocabulary from
deck_name: Name for the deck
target_lang: Target language for translations
tags: Optional tags for the deck
source_lang: Source language (auto-detect if None)
Returns:
Path to the saved deck
"""
# Try advanced generator first
if HAS_FLASHCARD_GENERATOR:
try:
generator = FlashcardGenerator()
# Create fake OCR result
ocr_result = {
'original_text': text,
'text': text,
'detected_language': source_lang or 'auto',
}
flashcard_data = generator.generate_flashcards([ocr_result], target_lang)
cards = flashcard_data.get('cards', [])
if cards:
if HAS_DIFFICULTY_SCORER:
scorer = get_difficulty_scorer()
cards = scorer.score_all_flashcards(cards)
for card in cards:
card['content_type'] = 'conversation_vocab'
_ensure_card_stats(card)
decks_dir = _get_decks_dir(username)
deck_path = decks_dir / f"{deck_name}.json"
deck = {
"name": deck_name,
"cards": cards,
"tags": tags or ["conversation"],
}
save_deck(deck_path, deck)
return deck_path
except Exception as e:
print(f"[flashcards_tools] Advanced generator failed: {e}, using fallback")
# Fallback
words = _extract_candidate_words(text)
if not words:
raise ValueError("No candidate words found in text.")
translator = GoogleTranslator(source="auto", target=target_lang)
cards = []
for w in words[:20]:
try:
trans = translator.translate(w)
except Exception:
continue
if not trans:
continue
if trans.strip().lower() == w.strip().lower():
continue
card = {
"front": w,
"back": trans,
"content_type": "conversation_vocab",
"language": target_lang,
}
card = _add_difficulty_to_card(card)
_ensure_card_stats(card)
cards.append(card)
if not cards:
raise ValueError("No translatable words found to build cards.")
decks_dir = _get_decks_dir(username)
deck_path = decks_dir / f"{deck_name}.json"
deck = {
"name": deck_name,
"cards": cards,
"tags": tags or ["conversation"],
}
save_deck(deck_path, deck)
return deck_path
def add_difficulty_to_deck(deck: Dict) -> Dict:
"""Add difficulty scores to all cards in a deck."""
if not HAS_DIFFICULTY_SCORER:
return deck
try:
scorer = get_difficulty_scorer()
deck["cards"] = scorer.score_all_flashcards(deck.get("cards", []))
deck["statistics"] = scorer.get_statistics(deck["cards"])
except Exception as e:
print(f"[flashcards_tools] Difficulty scoring failed: {e}")
return deck