Spaces:

TensorFlo
/

AutoAssess

Sleeping

App Files Files Community

AutoAssess / src /main.py

TensorFlo

bugfix

8c0a387 about 1 year ago

raw

history blame

4.81 kB

	import openai
	import os
	from openpyxl import load_workbook, Workbook

	from src.transcribe_image import transcribe_image
	from src.assess_text import assess_essay_with_gpt


	def process_essays(images, question, guidelines, workbook):
	sheet = workbook.active

	# Create a new workbook to save results
	new_workbook = Workbook()
	new_sheet = new_workbook.active

	# Copy headers
	for col in range(1, sheet.max_column + 1):
	new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value

	# Find the first empty row in the student ID column
	first_empty_row = None
	for row in range(2, sheet.max_row + 1):
	student_id = sheet.cell(row=row, column=1).value
	if student_id is None:
	first_empty_row = row
	break
	else:
	# If no empty cell was found, set first_empty_row to max_row + 1 to process all rows
	first_empty_row = sheet.max_row + 1


	img_index = 0
	# First Pass: Transcribe missing texts
	for row in range(2, first_empty_row):
	student_id = sheet.cell(row=row, column=1).value
	num_pages = sheet.cell(row=row, column=2).value
	transcribed_text = sheet.cell(row=row, column=3).value

	# Copy student ID and number of pages
	new_sheet.cell(row=row, column=1).value = student_id
	new_sheet.cell(row=row, column=2).value = num_pages

	# Transcribe if text is missing
	if transcribed_text is None:
	print(f"Transcribing essay for student {student_id}...")
	essay_text = ""
	for _ in range(num_pages):
	essay_text += transcribe_image(images[img_index]) + "\n"
	img_index += 1
	new_sheet.cell(row=row, column=3).value = essay_text.strip()
	else:
	# Copy the existing transcription if available
	new_sheet.cell(row=row, column=3).value = transcribed_text

	# Save current state with transcriptions
	# new_workbook.save("data/transcribed_essays.xlsx")
	# print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")

	# Collect graded examples and initialize list
	examples = []
	for row in range(2, first_empty_row):
	student_id = sheet.cell(row=row, column=1).value
	transcribed_text = sheet.cell(row=row, column=3).value
	mark = sheet.cell(row=row, column=4).value
	reason = sheet.cell(row=row, column=5).value

	# Store graded examples for prompt generation
	if mark is not None or reason is not None:
	assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
	examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})

	# Second Pass: Grade missing grades/reasons
	for row in range(2, first_empty_row):
	student_id = sheet.cell(row=row, column=1).value
	transcribed_text = new_sheet.cell(row=row, column=3).value
	mark = sheet.cell(row=row, column=4).value
	reason = sheet.cell(row=row, column=5).value

	if mark is None and reason is None:
	print(f"Assessing essay for student {student_id}...")
	assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
	new_sheet.cell(row=row, column=4).value = assessment['mark']
	new_sheet.cell(row=row, column=5).value = assessment['reason']
	# Add the assessed essay as an example for subsequent assessments
	examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
	else:
	# Copy the existing mark and reason to the new sheet
	new_sheet.cell(row=row, column=4).value = mark
	new_sheet.cell(row=row, column=5).value = reason

	# Save the new Excel file with assessments filled in
	return new_workbook


	if __name__ == "__main__":

	folder_path = "data/images" # Replace with actual folder path
	question_file = "data/question.txt" # Replace with actual file path
	guidelines_file = "data/assessment_guidelines.txt" # Replace with actual file path
	excel_file = "data/essays.xlsx"

	# Load
	images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
	with open(question_file, 'r') as file:
	question = file.read().strip()
	with open(guidelines_file, 'r') as file:
	guidelines = file.read().strip()
	workbook = load_workbook(excel_file)


	new_workbook = process_essays(
	images,
	question,
	guidelines,
	workbook
	)

	new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
	print("Assessment complete. Results saved in assessed version of the Excel file.")