Spaces:
Sleeping
Sleeping
| import base64 | |
| from openai import OpenAI | |
| import os | |
| from PIL import Image | |
| import io | |
| def resize_image(image, max_size=800): | |
| """Resize the image to ensure the max side length is `max_size` while maintaining aspect ratio.""" | |
| # Open the image using Pillow | |
| img = Image.open(image) | |
| print(img.size) | |
| # Get the current width and height of the image | |
| width, height = img.size | |
| # Resize the image if necessary | |
| if width > height: | |
| new_width = max_size | |
| new_height = int((new_width / width) * height) | |
| else: | |
| new_height = max_size | |
| new_width = int((new_height / height) * width) | |
| # Resize the image using the LANCZOS filter for high-quality rescaling | |
| img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| print('after resizing', img.size) | |
| # Save the resized image to a BytesIO object to later encode to base64 | |
| img_byte_arr = io.BytesIO() | |
| img.save(img_byte_arr, format='PNG') | |
| img_byte_arr.seek(0) # Rewind the BytesIO object to the beginning | |
| return img_byte_arr | |
| # Function to encode the image | |
| def encode_image(image_path): | |
| assert os.path.exists(image_path), "The image file does not exist." | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| def encode_image_from_uploaded_file(image): | |
| # Convert image to bytes | |
| assert image is not None, "No image uploaded." | |
| resized_image = resize_image(image) | |
| image_bytes = resized_image.read() | |
| return base64.b64encode(image_bytes).decode('utf-8') | |
| def transcribe_image(image_file): | |
| """Transcribe handwritten text from an image using OCR.""" | |
| # Initialize the OpenAI client | |
| client = OpenAI() | |
| # Encoding the image | |
| base64_image = encode_image_from_uploaded_file(image_file) | |
| # Preparing the API call | |
| response = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."}, | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=300 | |
| ) | |
| transcribed_text = response.choices[0].message.content | |
| return transcribed_text |