yusenthebot
Integrate advanced OCR, FlashcardGenerator, DifficultyScorer, and AI Quiz from language project
aa3fdef
| # -*- coding: utf-8 -*- | |
| """ | |
| Flashcards Tools - Enhanced with FlashcardGenerator and DifficultyScorer | |
| """ | |
| import json | |
| import re | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Any | |
| from deep_translator import GoogleTranslator | |
| from .config import get_user_dir | |
| # Import advanced generators (with fallback) | |
| try: | |
| from .flashcard_generator import FlashcardGenerator | |
| HAS_FLASHCARD_GENERATOR = True | |
| except ImportError: | |
| HAS_FLASHCARD_GENERATOR = False | |
| try: | |
| from .difficulty_scorer import get_difficulty_scorer | |
| HAS_DIFFICULTY_SCORER = True | |
| except ImportError: | |
| HAS_DIFFICULTY_SCORER = False | |
| def _get_decks_dir(username: str) -> Path: | |
| """Returns the directory where all of a user's decks are stored.""" | |
| user_dir = get_user_dir(username) | |
| decks_dir = user_dir / "decks" | |
| decks_dir.mkdir(parents=True, exist_ok=True) | |
| return decks_dir | |
| def list_user_decks(username: str) -> Dict[str, Path]: | |
| """Returns a mapping of deck name -> deck json path.""" | |
| decks_dir = _get_decks_dir(username) | |
| deck_files = sorted(decks_dir.glob("*.json")) | |
| decks: Dict[str, Path] = {} | |
| for path in deck_files: | |
| try: | |
| data = json.loads(path.read_text(encoding="utf-8")) | |
| name = data.get("name") or path.stem | |
| except Exception: | |
| name = path.stem | |
| if name in decks and decks[name] != path: | |
| name = f"{name} ({path.stem})" | |
| decks[name] = path | |
| return decks | |
| def _ensure_card_stats(card: Dict) -> None: | |
| """Ensure that a card has simple spaced-repetition stats.""" | |
| if "score" not in card: | |
| card["score"] = 0 | |
| if "reviews" not in card: | |
| card["reviews"] = 0 | |
| def _add_difficulty_to_card(card: Dict) -> Dict: | |
| """Add difficulty scoring to a card if DifficultyScorer is available.""" | |
| if HAS_DIFFICULTY_SCORER: | |
| try: | |
| scorer = get_difficulty_scorer() | |
| return scorer.score_flashcard(card) | |
| except Exception: | |
| pass | |
| return card | |
| def load_deck(path: Path) -> Dict: | |
| """Loads a deck from JSON with stats for spaced repetition.""" | |
| try: | |
| data = json.loads(path.read_text(encoding="utf-8")) | |
| except Exception: | |
| data = {} | |
| if "cards" not in data or not isinstance(data["cards"], list): | |
| data["cards"] = [] | |
| if "name" not in data: | |
| data["name"] = path.stem | |
| if "tags" not in data or not isinstance(data["tags"], list): | |
| data["tags"] = [] | |
| for card in data["cards"]: | |
| _ensure_card_stats(card) | |
| return data | |
| def save_deck(path: Path, deck: Dict) -> None: | |
| """Saves deck to JSON.""" | |
| if "cards" not in deck: | |
| deck["cards"] = [] | |
| if "name" not in deck: | |
| deck["name"] = path.stem | |
| if "tags" not in deck or not isinstance(deck["tags"], list): | |
| deck["tags"] = [] | |
| for card in deck["cards"]: | |
| _ensure_card_stats(card) | |
| path.write_text(json.dumps(deck, indent=2, ensure_ascii=False), encoding="utf-8") | |
| def _extract_candidate_words(text: str) -> List[str]: | |
| """Simple tokenizer & filter for candidate vocab words.""" | |
| tokens = re.findall(r"\b\w+\b", text, flags=re.UNICODE) | |
| out = [] | |
| seen = set() | |
| for t in tokens: | |
| t_norm = t.strip() | |
| if len(t_norm) < 2: | |
| continue | |
| if any(ch.isdigit() for ch in t_norm): | |
| continue | |
| lower = t_norm.lower() | |
| if lower in seen: | |
| continue | |
| seen.add(lower) | |
| out.append(t_norm) | |
| return out | |
| def generate_flashcards_from_ocr_results( | |
| username: str, | |
| ocr_results: List[Dict], | |
| deck_name: str = "ocr", | |
| target_lang: str = "en", | |
| tags: Optional[List[str]] = None, | |
| use_advanced_generator: bool = True, | |
| ) -> Path: | |
| """ | |
| Takes OCR results and constructs a vocab deck. | |
| Args: | |
| username: User identifier | |
| ocr_results: List of OCR result dicts with 'text' key | |
| deck_name: Name for the deck | |
| target_lang: Target language for translations | |
| tags: Optional tags for the deck | |
| use_advanced_generator: Whether to use FlashcardGenerator | |
| Returns: | |
| Path to the saved deck | |
| """ | |
| # Try advanced generator first | |
| if use_advanced_generator and HAS_FLASHCARD_GENERATOR: | |
| try: | |
| generator = FlashcardGenerator() | |
| flashcard_data = generator.generate_flashcards(ocr_results, target_lang) | |
| cards = flashcard_data.get('cards', []) | |
| if cards: | |
| # Add difficulty scores | |
| if HAS_DIFFICULTY_SCORER: | |
| scorer = get_difficulty_scorer() | |
| cards = scorer.score_all_flashcards(cards) | |
| # Ensure stats | |
| for card in cards: | |
| _ensure_card_stats(card) | |
| decks_dir = _get_decks_dir(username) | |
| deck_path = decks_dir / f"{deck_name}.json" | |
| deck = { | |
| "name": deck_name, | |
| "cards": cards, | |
| "tags": tags or ["ocr"], | |
| "metadata": flashcard_data.get('metadata', {}) | |
| } | |
| save_deck(deck_path, deck) | |
| return deck_path | |
| except Exception as e: | |
| print(f"[flashcards_tools] Advanced generator failed: {e}, using fallback") | |
| # Fallback to simple extraction | |
| all_text = [] | |
| for res in ocr_results: | |
| t = res.get("text") or res.get("raw_text") or res.get("original_text") or "" | |
| if t: | |
| all_text.append(t) | |
| joined = "\n".join(all_text) | |
| words = _extract_candidate_words(joined) | |
| if not words: | |
| raise ValueError("No candidate words found in OCR results.") | |
| translator = GoogleTranslator(source="auto", target=target_lang) | |
| cards = [] | |
| for w in words[:20]: # Limit to 20 words | |
| try: | |
| trans = translator.translate(w) | |
| except Exception: | |
| continue | |
| if not trans: | |
| continue | |
| if trans.strip().lower() == w.strip().lower(): | |
| continue | |
| card = { | |
| "front": w, | |
| "back": trans, | |
| "content_type": "ocr_vocab", | |
| "language": target_lang, | |
| } | |
| card = _add_difficulty_to_card(card) | |
| _ensure_card_stats(card) | |
| cards.append(card) | |
| if not cards: | |
| raise ValueError("No translatable words found to build cards.") | |
| decks_dir = _get_decks_dir(username) | |
| deck_path = decks_dir / f"{deck_name}.json" | |
| deck = { | |
| "name": deck_name, | |
| "cards": cards, | |
| "tags": tags or [], | |
| } | |
| save_deck(deck_path, deck) | |
| return deck_path | |
| def generate_flashcards_from_text( | |
| username: str, | |
| text: str, | |
| deck_name: str = "conversation", | |
| target_lang: str = "en", | |
| tags: Optional[List[str]] = None, | |
| source_lang: Optional[str] = None, | |
| ) -> Path: | |
| """ | |
| Build a vocab deck from raw text. | |
| Args: | |
| username: User identifier | |
| text: Raw text to extract vocabulary from | |
| deck_name: Name for the deck | |
| target_lang: Target language for translations | |
| tags: Optional tags for the deck | |
| source_lang: Source language (auto-detect if None) | |
| Returns: | |
| Path to the saved deck | |
| """ | |
| # Try advanced generator first | |
| if HAS_FLASHCARD_GENERATOR: | |
| try: | |
| generator = FlashcardGenerator() | |
| # Create fake OCR result | |
| ocr_result = { | |
| 'original_text': text, | |
| 'text': text, | |
| 'detected_language': source_lang or 'auto', | |
| } | |
| flashcard_data = generator.generate_flashcards([ocr_result], target_lang) | |
| cards = flashcard_data.get('cards', []) | |
| if cards: | |
| if HAS_DIFFICULTY_SCORER: | |
| scorer = get_difficulty_scorer() | |
| cards = scorer.score_all_flashcards(cards) | |
| for card in cards: | |
| card['content_type'] = 'conversation_vocab' | |
| _ensure_card_stats(card) | |
| decks_dir = _get_decks_dir(username) | |
| deck_path = decks_dir / f"{deck_name}.json" | |
| deck = { | |
| "name": deck_name, | |
| "cards": cards, | |
| "tags": tags or ["conversation"], | |
| } | |
| save_deck(deck_path, deck) | |
| return deck_path | |
| except Exception as e: | |
| print(f"[flashcards_tools] Advanced generator failed: {e}, using fallback") | |
| # Fallback | |
| words = _extract_candidate_words(text) | |
| if not words: | |
| raise ValueError("No candidate words found in text.") | |
| translator = GoogleTranslator(source="auto", target=target_lang) | |
| cards = [] | |
| for w in words[:20]: | |
| try: | |
| trans = translator.translate(w) | |
| except Exception: | |
| continue | |
| if not trans: | |
| continue | |
| if trans.strip().lower() == w.strip().lower(): | |
| continue | |
| card = { | |
| "front": w, | |
| "back": trans, | |
| "content_type": "conversation_vocab", | |
| "language": target_lang, | |
| } | |
| card = _add_difficulty_to_card(card) | |
| _ensure_card_stats(card) | |
| cards.append(card) | |
| if not cards: | |
| raise ValueError("No translatable words found to build cards.") | |
| decks_dir = _get_decks_dir(username) | |
| deck_path = decks_dir / f"{deck_name}.json" | |
| deck = { | |
| "name": deck_name, | |
| "cards": cards, | |
| "tags": tags or ["conversation"], | |
| } | |
| save_deck(deck_path, deck) | |
| return deck_path | |
| def add_difficulty_to_deck(deck: Dict) -> Dict: | |
| """Add difficulty scores to all cards in a deck.""" | |
| if not HAS_DIFFICULTY_SCORER: | |
| return deck | |
| try: | |
| scorer = get_difficulty_scorer() | |
| deck["cards"] = scorer.score_all_flashcards(deck.get("cards", [])) | |
| deck["statistics"] = scorer.get_statistics(deck["cards"]) | |
| except Exception as e: | |
| print(f"[flashcards_tools] Difficulty scoring failed: {e}") | |
| return deck | |