Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, UploadFile, File, Form | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import torch | |
| from transformers import pipeline | |
| import PyPDF2 | |
| from docx import Document | |
| import io | |
| app = FastAPI( | |
| title="Text Summarization API", | |
| description="API for summarizing text and documents using Falcon's text summarization model" | |
| ) | |
| # Configure CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Initialize the summarization pipeline | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device ) | |
| def extract_text_from_pdf(file_bytes): | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes)) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def extract_text_from_docx(file_bytes): | |
| doc = Document(io.BytesIO(file_bytes)) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" | |
| return text | |
| async def summarize_text(text: str = Form(...)): | |
| """ | |
| Summarize text input | |
| """ | |
| if not text: | |
| return {"error": "Please provide text to summarize"} | |
| summary = summarization_pipe(text) | |
| return {"summary": summary[0]['summary_text']} | |
| async def summarize_file(file: UploadFile = File(...)): | |
| """ | |
| Summarize text from a PDF or DOCX file | |
| """ | |
| contents = await file.read() | |
| file_name = file.filename.lower() | |
| try: | |
| if file_name.endswith('.pdf'): | |
| text = extract_text_from_pdf(contents) | |
| elif file_name.endswith('.docx'): | |
| text = extract_text_from_docx(contents) | |
| else: | |
| return {"error": "Unsupported file format. Please upload a PDF or DOCX file."} | |
| if not text: | |
| return {"error": "Could not extract text from the file"} | |
| summary = summarization_pipe(text) | |
| return {"summary": summary[0]['summary_text']} | |
| except Exception as e: | |
| return {"error": f"Error processing file: {str(e)}"} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |