Spaces:
Sleeping
Sleeping
DishaKushwah
commited on
Commit
·
f6df526
1
Parent(s):
681d41f
Update mcq_generator.py
Browse files- mcq_generator.py +171 -1
mcq_generator.py
CHANGED
|
@@ -1 +1,171 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import random
|
| 3 |
+
import nltk
|
| 4 |
+
import re
|
| 5 |
+
from transformers import (pipeline, AutoModelForQuestionAnswering, AutoTokenizer)
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 8 |
+
from typing import List, Dict, Any
|
| 9 |
+
nltk.download('punkt')
|
| 10 |
+
nltk.download('stopwords')
|
| 11 |
+
|
| 12 |
+
class AdvancedMCQGenerator:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
"""Advanced Multiple Choice Question Generator with Intelligent Distractor Strategy"""
|
| 15 |
+
# Question Answering Model
|
| 16 |
+
qa_model_name = "deepset/roberta-base-squad2"
|
| 17 |
+
self.qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
|
| 18 |
+
self.qa_pipeline = pipeline("question-answering", model=qa_model_name,device=0 if torch.cuda.is_available() else -1)
|
| 19 |
+
self.sentence_embedder = SentenceTransformer('all-mpnet-base-v2')
|
| 20 |
+
self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
|
| 21 |
+
self.sentence_tokenizer = nltk.sent_tokenize
|
| 22 |
+
self.generated_questions = set()
|
| 23 |
+
|
| 24 |
+
def _extract_context_features(self, context: str) -> Dict[str, Any]:
|
| 25 |
+
"""Advanced context feature extraction"""
|
| 26 |
+
sentences = self.sentence_tokenizer(context)
|
| 27 |
+
try:
|
| 28 |
+
tfidf_matrix = self.tfidf_vectorizer.fit_transform(sentences)
|
| 29 |
+
feature_names = self.tfidf_vectorizer.get_feature_names_out()
|
| 30 |
+
top_keywords = []
|
| 31 |
+
for i, sentence in enumerate(sentences):
|
| 32 |
+
feature_indices = tfidf_matrix[i].nonzero()[1] # Get top TF-IDF scores for each sentence
|
| 33 |
+
top_sentence_keywords = [feature_names[idx] for idx in feature_indices][:3]
|
| 34 |
+
top_keywords.extend(top_sentence_keywords)
|
| 35 |
+
return {'sentences': sentences,'keywords': list(set(top_keywords)),'total_sentences': len(sentences)}
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"Context feature extraction error: {e}")
|
| 38 |
+
return {'sentences': sentences,'keywords': context.split()[:10],'total_sentences': len(sentences)}
|
| 39 |
+
|
| 40 |
+
def _generate_smart_distractors(self, correct_answer: str, context_features: Dict[str, Any], num_distractors: int = 3) -> List[str]:
|
| 41 |
+
"""Intelligent Distractor Generation Strategy"""
|
| 42 |
+
distractors = []
|
| 43 |
+
used_options = set([correct_answer.lower()])
|
| 44 |
+
sentences = context_features['sentences']
|
| 45 |
+
keywords = context_features['keywords']
|
| 46 |
+
# Semantic similarity-based distractor generation
|
| 47 |
+
for _ in range(num_distractors):
|
| 48 |
+
try:
|
| 49 |
+
semantic_candidates = [sent for sent in sentences if sent.lower() not in used_options and len(sent.split()) > 3]
|
| 50 |
+
if semantic_candidates:
|
| 51 |
+
candidate_similarities = [(sent, self._calculate_semantic_similarity(correct_answer, sent)) for sent in semantic_candidates]
|
| 52 |
+
candidate_similarities.sort(key=lambda x: abs(0.5 - x[1]))
|
| 53 |
+
if candidate_similarities:
|
| 54 |
+
best_distractor = candidate_similarities[0][0]
|
| 55 |
+
distractors.append(best_distractor)
|
| 56 |
+
used_options.add(best_distractor.lower())
|
| 57 |
+
continue
|
| 58 |
+
|
| 59 |
+
if keywords:
|
| 60 |
+
keyword_distractor = f"A key aspect related to {random.choice(keywords)}"
|
| 61 |
+
distractors.append(keyword_distractor)
|
| 62 |
+
used_options.add(keyword_distractor.lower())
|
| 63 |
+
continue
|
| 64 |
+
fallback_distractors = ["A related contextual detail","An alternative interpretation","A supplementary concept"]
|
| 65 |
+
distractor = random.choice(fallback_distractors)
|
| 66 |
+
distractors.append(distractor)
|
| 67 |
+
used_options.add(distractor.lower())
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f"Distractor generation error: {e}")
|
| 71 |
+
distractors.append("A contextual detail")
|
| 72 |
+
return distractors[:num_distractors]
|
| 73 |
+
|
| 74 |
+
def _calculate_semantic_similarity(self, text1: str, text2: str) -> float:
|
| 75 |
+
"""Calculate semantic similarity between two texts"""
|
| 76 |
+
try:
|
| 77 |
+
# Embed texts
|
| 78 |
+
embedding1 = self.sentence_embedder.encode(text1)
|
| 79 |
+
embedding2 = self.sentence_embedder.encode(text2)
|
| 80 |
+
|
| 81 |
+
# Calculate cosine similarity
|
| 82 |
+
similarity = torch.nn.functional.cosine_similarity(torch.tensor(embedding1), torch.tensor(embedding2)).item()
|
| 83 |
+
return abs(similarity)
|
| 84 |
+
except Exception:
|
| 85 |
+
return 0.0
|
| 86 |
+
|
| 87 |
+
def generate_mcq(self, context: str, num_questions: int = 5, difficulty: str = "medium") -> List[Dict[str, Any]]:
|
| 88 |
+
"""
|
| 89 |
+
Generate Multiple Choice Questions
|
| 90 |
+
"""
|
| 91 |
+
context = self._preprocess_context(context) # Preprocess context
|
| 92 |
+
context_features = self._extract_context_features(context) # Extract context features
|
| 93 |
+
self.generated_questions.clear() # Reset generated questions
|
| 94 |
+
mcq_questions = []
|
| 95 |
+
|
| 96 |
+
for _ in range(num_questions):
|
| 97 |
+
try:
|
| 98 |
+
keywords = context_features['keywords']
|
| 99 |
+
subject = random.choice(keywords)
|
| 100 |
+
# Question templates
|
| 101 |
+
templates = [f"What is the significance of {subject} in this context?",f"Explain the role of {subject}.",f"How does {subject} contribute to the overall understanding?"]
|
| 102 |
+
question = random.choice(templates)
|
| 103 |
+
answer_result = self.qa_pipeline(question=question, context=context) # Extract answer using QA pipeline
|
| 104 |
+
correct_answer = answer_result['answer'] # Get correct answer
|
| 105 |
+
distractors = self._generate_smart_distractors(correct_answer, context_features) # Generate contextually relevant distractors
|
| 106 |
+
all_options = [correct_answer] + distractors # Combine options
|
| 107 |
+
random.shuffle(all_options)
|
| 108 |
+
correct_index = all_options.index(correct_answer) # Determine correct option index
|
| 109 |
+
mcq_questions.append({"question": question,"options": all_options,"correct_answer": correct_index,"explanation": f"Correct answer based on the context: {correct_answer}"})
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"MCQ generation error: {e}")
|
| 112 |
+
return mcq_questions
|
| 113 |
+
|
| 114 |
+
def _preprocess_context(self, context: str) -> str:
|
| 115 |
+
"""Advanced context preprocessing"""
|
| 116 |
+
context = re.sub(r'\s+', ' ', context).strip() # Remove extra whitespaces and special characters
|
| 117 |
+
context = ''.join(char for char in context if char.isprintable()) # Remove non-printable characters
|
| 118 |
+
if len(context.split()) < 20: # Append context if too short
|
| 119 |
+
context += " Additional context to enhance question generation."
|
| 120 |
+
return context
|
| 121 |
+
|
| 122 |
+
def main():
|
| 123 |
+
generator = AdvancedMCQGenerator()
|
| 124 |
+
print(" -------------Multiple Choice Question Generator-------------")
|
| 125 |
+
context = input("\n>> Enter context text: ")
|
| 126 |
+
while True:
|
| 127 |
+
try:
|
| 128 |
+
num_questions = int(input("\n>> How many questions do you want to generate? "))
|
| 129 |
+
break
|
| 130 |
+
except ValueError:
|
| 131 |
+
print("Please enter a valid number.")
|
| 132 |
+
|
| 133 |
+
while True:
|
| 134 |
+
difficulty = input("\n>> Enter difficulty level (easy/medium/hard): ").lower()
|
| 135 |
+
if difficulty in ['easy', 'medium', 'hard']:
|
| 136 |
+
break
|
| 137 |
+
print("Invalid difficulty level. Please choose easy, medium, or hard.")
|
| 138 |
+
questions = generator.generate_mcq(context, num_questions, difficulty)
|
| 139 |
+
if questions:
|
| 140 |
+
print("\n--- Multiple Choice Quiz ---")
|
| 141 |
+
correct_answers = 0 # Simple score tracking
|
| 142 |
+
total_questions = len(questions)
|
| 143 |
+
|
| 144 |
+
for i, q in enumerate(questions, 1):
|
| 145 |
+
print(f"\nQuestion {i}: {q['question']}")
|
| 146 |
+
print("Options:")
|
| 147 |
+
for j, option in enumerate(q['options']):
|
| 148 |
+
print(f"{chr(65+j)}. {option}")
|
| 149 |
+
|
| 150 |
+
while True:
|
| 151 |
+
user_input = input("\nYour Answer (A/B/C/D): ").upper()
|
| 152 |
+
if user_input in ['A', 'B', 'C', 'D']:
|
| 153 |
+
break
|
| 154 |
+
print("Invalid input. Please enter A, B, C, or D.")
|
| 155 |
+
|
| 156 |
+
user_answer_index = ord(user_input) - 65
|
| 157 |
+
if user_answer_index == q['correct_answer']:
|
| 158 |
+
print("✅ Correct!")
|
| 159 |
+
correct_answers += 1
|
| 160 |
+
else:
|
| 161 |
+
print(f"❌ Incorrect. Correct Answer: {chr(65+q['correct_answer'])}")
|
| 162 |
+
|
| 163 |
+
# Simple score display
|
| 164 |
+
print(f"\n-----Score: {correct_answers}/{total_questions}-----")
|
| 165 |
+
|
| 166 |
+
else:
|
| 167 |
+
print("\nNo multiple choice questions were generated.")
|
| 168 |
+
|
| 169 |
+
if __name__ == "__main__":
|
| 170 |
+
main()
|
| 171 |
+
# SAMPLE CONTEXT- The French Revolution began in 1789 and marked a significant turning point in European history. It was fueled by widespread social inequality, financial crisis, and the rise of Enlightenment ideas. The French monarchy was overthrown, and King Louis XVI was executed. The revolution introduced the ideals of liberty, equality, and fraternity. It led to the rise of Napoleon Bonaparte and had a lasting impact on modern democracy and human rights movements around the world.
|