Spaces:

ajibs75
/

Text_Summarization_API

Sleeping

App Files Files Community

Text_Summarization_API / app.py

ajibs75

Update app.py

24ccd6f verified 12 months ago

raw

history blame contribute delete

2.31 kB

	from fastapi import FastAPI, UploadFile, File, Form
	from fastapi.middleware.cors import CORSMiddleware
	import torch
	from transformers import pipeline
	import PyPDF2
	from docx import Document
	import io

	app = FastAPI(
	title="Text Summarization API",
	description="API for summarizing text and documents using Falcon's text summarization model"
	)

	# Configure CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Initialize the summarization pipeline
	device = "cuda" if torch.cuda.is_available() else "cpu"
	summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device )

	def extract_text_from_pdf(file_bytes):
	pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def extract_text_from_docx(file_bytes):
	doc = Document(io.BytesIO(file_bytes))
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	return text

	@app.post("/summarize/text")
	async def summarize_text(text: str = Form(...)):
	"""
	Summarize text input
	"""
	if not text:
	return {"error": "Please provide text to summarize"}

	summary = summarization_pipe(text)
	return {"summary": summary[0]['summary_text']}

	@app.post("/summarize/file")
	async def summarize_file(file: UploadFile = File(...)):
	"""
	Summarize text from a PDF or DOCX file
	"""
	contents = await file.read()
	file_name = file.filename.lower()

	try:
	if file_name.endswith('.pdf'):
	text = extract_text_from_pdf(contents)
	elif file_name.endswith('.docx'):
	text = extract_text_from_docx(contents)
	else:
	return {"error": "Unsupported file format. Please upload a PDF or DOCX file."}

	if not text:
	return {"error": "Could not extract text from the file"}

	summary = summarization_pipe(text)
	return {"summary": summary[0]['summary_text']}

	except Exception as e:
	return {"error": f"Error processing file: {str(e)}"}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)