Spaces:
Sleeping
Sleeping
| import openai | |
| import os | |
| from openpyxl import load_workbook, Workbook | |
| from src.transcribe_image import transcribe_image | |
| from src.assess_text import assess_essay_with_gpt | |
| def process_essays(images, question, guidelines, workbook): | |
| sheet = workbook.active | |
| # Create a new workbook to save results | |
| new_workbook = Workbook() | |
| new_sheet = new_workbook.active | |
| # Copy headers | |
| for col in range(1, sheet.max_column + 1): | |
| new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value | |
| # Find the first empty row in the student ID column | |
| first_empty_row = None | |
| for row in range(2, sheet.max_row + 1): | |
| student_id = sheet.cell(row=row, column=1).value | |
| if student_id is None: | |
| first_empty_row = row | |
| break | |
| else: | |
| # If no empty cell was found, set first_empty_row to max_row + 1 to process all rows | |
| first_empty_row = sheet.max_row + 1 | |
| img_index = 0 | |
| # First Pass: Transcribe missing texts | |
| for row in range(2, first_empty_row): | |
| student_id = sheet.cell(row=row, column=1).value | |
| num_pages = sheet.cell(row=row, column=2).value | |
| transcribed_text = sheet.cell(row=row, column=3).value | |
| # Copy student ID and number of pages | |
| new_sheet.cell(row=row, column=1).value = student_id | |
| new_sheet.cell(row=row, column=2).value = num_pages | |
| # Transcribe if text is missing | |
| if transcribed_text is None: | |
| print(f"Transcribing essay for student {student_id}...") | |
| essay_text = "" | |
| for _ in range(num_pages): | |
| essay_text += transcribe_image(images[img_index]) + "\n" | |
| img_index += 1 | |
| new_sheet.cell(row=row, column=3).value = essay_text.strip() | |
| else: | |
| # Copy the existing transcription if available | |
| new_sheet.cell(row=row, column=3).value = transcribed_text | |
| # Save current state with transcriptions | |
| # new_workbook.save("data/transcribed_essays.xlsx") | |
| # print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.") | |
| # Collect graded examples and initialize list | |
| examples = [] | |
| for row in range(2, first_empty_row): | |
| student_id = sheet.cell(row=row, column=1).value | |
| transcribed_text = sheet.cell(row=row, column=3).value | |
| mark = sheet.cell(row=row, column=4).value | |
| reason = sheet.cell(row=row, column=5).value | |
| # Store graded examples for prompt generation | |
| if mark is not None or reason is not None: | |
| assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}." | |
| examples.append({"essay": transcribed_text, "mark": mark, "reason": reason}) | |
| # Second Pass: Grade missing grades/reasons | |
| for row in range(2, first_empty_row): | |
| student_id = sheet.cell(row=row, column=1).value | |
| transcribed_text = new_sheet.cell(row=row, column=3).value | |
| mark = sheet.cell(row=row, column=4).value | |
| reason = sheet.cell(row=row, column=5).value | |
| if mark is None and reason is None: | |
| print(f"Assessing essay for student {student_id}...") | |
| assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples) | |
| new_sheet.cell(row=row, column=4).value = assessment['mark'] | |
| new_sheet.cell(row=row, column=5).value = assessment['reason'] | |
| # Add the assessed essay as an example for subsequent assessments | |
| examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']}) | |
| else: | |
| # Copy the existing mark and reason to the new sheet | |
| new_sheet.cell(row=row, column=4).value = mark | |
| new_sheet.cell(row=row, column=5).value = reason | |
| # Save the new Excel file with assessments filled in | |
| return new_workbook | |
| if __name__ == "__main__": | |
| folder_path = "data/images" # Replace with actual folder path | |
| question_file = "data/question.txt" # Replace with actual file path | |
| guidelines_file = "data/assessment_guidelines.txt" # Replace with actual file path | |
| excel_file = "data/essays.xlsx" | |
| # Load | |
| images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime) | |
| with open(question_file, 'r') as file: | |
| question = file.read().strip() | |
| with open(guidelines_file, 'r') as file: | |
| guidelines = file.read().strip() | |
| workbook = load_workbook(excel_file) | |
| new_workbook = process_essays( | |
| images, | |
| question, | |
| guidelines, | |
| workbook | |
| ) | |
| new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx")) | |
| print("Assessment complete. Results saved in assessed version of the Excel file.") |