Spaces:
Sleeping
Sleeping
Update api.py
Browse files
api.py
CHANGED
|
@@ -80,7 +80,7 @@ def get_content(number: str, node_type: str) -> str:
|
|
| 80 |
logger.error(f"An unexpected error occurred in get_content for {number}: {e}")
|
| 81 |
return ""
|
| 82 |
|
| 83 |
-
def
|
| 84 |
"""Extracts information from an Arxiv research paper and generates a summary."""
|
| 85 |
|
| 86 |
rp_data = {
|
|
@@ -154,7 +154,7 @@ def extract_research_paper_arxiv(rp_number: str, node_type: str = "ResearchPaper
|
|
| 154 |
rp_data["summary"] = "Summary not generated (Abstract unavailable or problematic)"
|
| 155 |
return rp_data
|
| 156 |
|
| 157 |
-
def
|
| 158 |
"""
|
| 159 |
Extracts information from a Google Patents page with robust error handling.
|
| 160 |
"""
|
|
@@ -223,19 +223,19 @@ def extract_patent_data(patent_number: str, node_type: str = "Patent"):
|
|
| 223 |
|
| 224 |
prompt = f"""You are a 3GPP standardization expert. Summarize the key information in the provided document in simple technical English relevant to identifying potential Key Issues.
|
| 225 |
Focus on challenges, gaps, or novel aspects.
|
| 226 |
-
Here is the document: <document>{
|
| 227 |
|
| 228 |
try:
|
| 229 |
model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
|
| 230 |
response = model.generate_content(prompt)
|
| 231 |
|
| 232 |
-
|
| 233 |
logger.info(f"Summary generated for Patent ID: {patent_number}")
|
| 234 |
except Exception as e:
|
| 235 |
logger.error(f"Error generating summary with Gemini for Patent ID {patent_number}: {e}")
|
| 236 |
-
|
| 237 |
else:
|
| 238 |
-
rp_data["summary"] = "Summary not generated (
|
| 239 |
return patent_data
|
| 240 |
|
| 241 |
def add_nodes_to_neo4j(driver, data_list: list, node_type: str):
|
|
@@ -291,7 +291,7 @@ async def add_single_research_paper(arxiv_id: str):
|
|
| 291 |
raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
|
| 292 |
|
| 293 |
# Step 1: Extract paper data
|
| 294 |
-
paper_data =
|
| 295 |
|
| 296 |
if paper_data["title"].startswith("Error fetching content") or paper_data["title"] == "Title not found on page":
|
| 297 |
logger.warning(f"Could not fetch or parse content for Arxiv ID {arxiv_id}. Title: {paper_data['title']}")
|
|
@@ -325,6 +325,60 @@ async def add_single_research_paper(arxiv_id: str):
|
|
| 325 |
except Exception as e:
|
| 326 |
logger.error(f"An unexpected error occurred during Neo4j operation for {arxiv_id}: {e}", exc_info=True)
|
| 327 |
raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
finally:
|
| 329 |
if driver_instance:
|
| 330 |
driver_instance.close()
|
|
|
|
| 80 |
logger.error(f"An unexpected error occurred in get_content for {number}: {e}")
|
| 81 |
return ""
|
| 82 |
|
| 83 |
+
def extract_arxiv(rp_number: str, node_type: str = "ResearchPaper") -> dict:
|
| 84 |
"""Extracts information from an Arxiv research paper and generates a summary."""
|
| 85 |
|
| 86 |
rp_data = {
|
|
|
|
| 154 |
rp_data["summary"] = "Summary not generated (Abstract unavailable or problematic)"
|
| 155 |
return rp_data
|
| 156 |
|
| 157 |
+
def extract_google_patents(patent_number: str, node_type: str = "Patent"):
|
| 158 |
"""
|
| 159 |
Extracts information from a Google Patents page with robust error handling.
|
| 160 |
"""
|
|
|
|
| 223 |
|
| 224 |
prompt = f"""You are a 3GPP standardization expert. Summarize the key information in the provided document in simple technical English relevant to identifying potential Key Issues.
|
| 225 |
Focus on challenges, gaps, or novel aspects.
|
| 226 |
+
Here is the document: <document>{patent_data['description']}<document>"""
|
| 227 |
|
| 228 |
try:
|
| 229 |
model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
|
| 230 |
response = model.generate_content(prompt)
|
| 231 |
|
| 232 |
+
patent_data["summary"] = response.text
|
| 233 |
logger.info(f"Summary generated for Patent ID: {patent_number}")
|
| 234 |
except Exception as e:
|
| 235 |
logger.error(f"Error generating summary with Gemini for Patent ID {patent_number}: {e}")
|
| 236 |
+
patent_data["summary"] = "Error generating summary (API failure)"
|
| 237 |
else:
|
| 238 |
+
rp_data["summary"] = "Summary not generated (Description unavailable or problematic)"
|
| 239 |
return patent_data
|
| 240 |
|
| 241 |
def add_nodes_to_neo4j(driver, data_list: list, node_type: str):
|
|
|
|
| 291 |
raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
|
| 292 |
|
| 293 |
# Step 1: Extract paper data
|
| 294 |
+
paper_data = extract_arxiv(arxiv_id, node_type)
|
| 295 |
|
| 296 |
if paper_data["title"].startswith("Error fetching content") or paper_data["title"] == "Title not found on page":
|
| 297 |
logger.warning(f"Could not fetch or parse content for Arxiv ID {arxiv_id}. Title: {paper_data['title']}")
|
|
|
|
| 325 |
except Exception as e:
|
| 326 |
logger.error(f"An unexpected error occurred during Neo4j operation for {arxiv_id}: {e}", exc_info=True)
|
| 327 |
raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
|
| 328 |
+
finally:
|
| 329 |
+
if driver_instance:
|
| 330 |
+
driver_instance.close()
|
| 331 |
+
logger.info("Neo4j connection closed.")
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
@app.post("/add_patent/{patent_id}", status_code=201) # 201 Created for successful creation
|
| 335 |
+
async def add_single_patent(patent_id: str):
|
| 336 |
+
"""
|
| 337 |
+
Fetches a patent from Google Patents by its ID, extracts information,
|
| 338 |
+
generates a summary, and adds/updates it as a 'Patent' node in Neo4j.
|
| 339 |
+
"""
|
| 340 |
+
node_type = "Patent"
|
| 341 |
+
logger.info(f"Processing request for Patent ID: {patent_id}")
|
| 342 |
+
|
| 343 |
+
if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD:
|
| 344 |
+
logger.error("Neo4j database connection details are not configured on the server.")
|
| 345 |
+
raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
|
| 346 |
+
|
| 347 |
+
# Step 1: Extract patent data
|
| 348 |
+
patent_data = extract_google_patents(patent_id, node_type)
|
| 349 |
+
|
| 350 |
+
if patent_data["title"].startswith("Error fetching content") or patent_data["title"] == "Title not found on page":
|
| 351 |
+
logger.warning(f"Could not fetch or parse content for Patent ID {patent_id}. Title: {patent_data['title']}")
|
| 352 |
+
raise HTTPException(status_code=404, detail=f"Could not fetch or parse content for Patent ID {patent_id}. Title: {patent_data['title']}")
|
| 353 |
+
|
| 354 |
+
# Step 2: Add/Update in Neo4j
|
| 355 |
+
driver_instance = None # Initialize for the finally block
|
| 356 |
+
try:
|
| 357 |
+
auth_token = basic_auth(NEO4J_USER, NEO4J_PASSWORD)
|
| 358 |
+
driver_instance = GraphDatabase.driver(NEO4J_URI, auth=auth_token)
|
| 359 |
+
driver_instance.verify_connectivity()
|
| 360 |
+
logger.info("Successfully connected to Neo4j.")
|
| 361 |
+
|
| 362 |
+
nodes_created_count = add_nodes_to_neo4j(driver_instance, [patent_data], node_type)
|
| 363 |
+
|
| 364 |
+
if nodes_created_count > 0 :
|
| 365 |
+
logger.info(f"Patent {patent_id} was successfully added to Neo4j.")
|
| 366 |
+
status_code_response = 201 # Created
|
| 367 |
+
|
| 368 |
+
# Note: FastAPI uses the status_code from the decorator or HTTPException.
|
| 369 |
+
# This custom status_code_response is for the JSON body if needed, but the actual HTTP response status
|
| 370 |
+
# will be 201 (from decorator) unless an HTTPException overrides it or we change the decorator based on logic.
|
| 371 |
+
# For simplicity here, we'll return it in the body and let the decorator's 201 stand if no error.
|
| 372 |
+
# A more advanced setup might change the response status dynamically.
|
| 373 |
+
|
| 374 |
+
return {"data": patent_data}
|
| 375 |
+
|
| 376 |
+
except HTTPException as e: # Re-raise HTTPExceptions
|
| 377 |
+
logger.error(f"HTTPException during Neo4j operation for {patent_id}: {e.detail}")
|
| 378 |
+
raise e
|
| 379 |
+
except Exception as e:
|
| 380 |
+
logger.error(f"An unexpected error occurred during Neo4j operation for {patent_id}: {e}", exc_info=True)
|
| 381 |
+
raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
|
| 382 |
finally:
|
| 383 |
if driver_instance:
|
| 384 |
driver_instance.close()
|