Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, UploadFile, HTTPException | |
| import asyncio | |
| from utils.PdfUtils import ProcessPdf | |
| from utils.HelperFunctions import ( | |
| generate_file_id, | |
| save_to_database, | |
| retrieve_from_database, | |
| ) | |
| from utils.VectorDatabase import AdvancedClient | |
| from utils.ModelCallingFunctions import ( | |
| industry_finder, | |
| other_info, | |
| business_information, | |
| ) | |
| app = FastAPI() | |
| client = AdvancedClient("VectorDB") | |
| async def get_analysis(pdf_file: UploadFile): | |
| if not pdf_file: | |
| raise HTTPException(status_code=400, detail="Pitch PDF file not provided") | |
| pdf_content = await pdf_file.read() | |
| pdf_id = generate_file_id(pdf_content) | |
| file_name = pdf_file.filename | |
| if pdf_id not in [ | |
| collection.name for collection in client.client.list_collections() | |
| ]: | |
| pdf_chunks = ProcessPdf(pdf_content=pdf_content) | |
| client.create_collection(collection_id=pdf_id, file_datas=pdf_chunks) | |
| # Starting of pitch deck information extraction and structuring | |
| industry_info = industry_finder(collection_id=pdf_id) | |
| industry_info["pitch-deck"] = file_name | |
| other_info_results = await other_info(company_data=industry_info) | |
| business_info = await business_information(collection_id=pdf_id) | |
| json = { | |
| "industry": industry_info, | |
| "other_info": other_info_results, | |
| "business_info": business_info, | |
| } | |
| save_to_database(_id=pdf_id, data=json) | |
| return json | |
| else: | |
| # Starting of pitch deck information extraction and structuring | |
| json = retrieve_from_database(_id=pdf_id) | |
| return json | |