Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +24 -14
pipeline.py
CHANGED
|
@@ -459,22 +459,32 @@ def pipeline_with_gemini(accessions):
|
|
| 459 |
chunks_filename = "document_chunks.json"
|
| 460 |
lookup_filename = "structured_lookup.json"
|
| 461 |
print("name of faiss: ", faiss_filename)
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
structured_lookup_path = os.path.join(tmp_dir, lookup_filename)
|
| 467 |
print("name if faiss path: ", faiss_index_path)
|
| 468 |
# 🔥 Remove the local file first if it exists
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
print("move to load rag")
|
| 479 |
master_structured_lookup, faiss_index, document_chunks = model.load_rag_assets(
|
| 480 |
faiss_index_path, document_chunks_path, structured_lookup_path
|
|
|
|
| 459 |
chunks_filename = "document_chunks.json"
|
| 460 |
lookup_filename = "structured_lookup.json"
|
| 461 |
print("name of faiss: ", faiss_filename)
|
| 462 |
+
|
| 463 |
+
faiss_index_path = os.path.join(LOCAL_TEMP_DIR, faiss_filename)
|
| 464 |
+
document_chunks_path = os.path.join(LOCAL_TEMP_DIR, chunks_filename)
|
| 465 |
+
structured_lookup_path = os.path.join(LOCAL_TEMP_DIR, lookup_filename)
|
|
|
|
| 466 |
print("name if faiss path: ", faiss_index_path)
|
| 467 |
# 🔥 Remove the local file first if it exists
|
| 468 |
+
faiss_id = find_drive_file(faiss_filename, sample_folder_id)
|
| 469 |
+
document_id = find_drive_file(chunks_filename, sample_folder_id)
|
| 470 |
+
structure_id = find_drive_file(lookup_filename, sample_folder_id)
|
| 471 |
+
if faiss_id and document_id and structure_id:
|
| 472 |
+
print("✅ 3 Files already exist in Google Drive. Downloading them...")
|
| 473 |
+
download_file_from_drive(faiss_filename, sample_folder_id, faiss_index_path)
|
| 474 |
+
download_file_from_drive(chunks_filename, sample_folder_id, document_chunks_path)
|
| 475 |
+
download_file_from_drive(lookup_filename, sample_folder_id, structured_lookup_path)
|
| 476 |
+
# Read and parse these into `chunk` and `all_output`
|
| 477 |
+
else:
|
| 478 |
+
if os.path.exists(faiss_index_path):
|
| 479 |
+
os.remove(faiss_index_path)
|
| 480 |
+
if os.path.exists(document_chunks_path):
|
| 481 |
+
os.remove(document_chunks_path)
|
| 482 |
+
if os.path.exists(structured_lookup_path):
|
| 483 |
+
os.remove(structured_lookup_path)
|
| 484 |
+
download_file_from_drive(faiss_filename, sample_folder_id, faiss_index_path)
|
| 485 |
+
download_file_from_drive(chunks_filename, sample_folder_id, document_chunks_path)
|
| 486 |
+
download_file_from_drive(lookup_filename, sample_folder_id, structured_lookup_path)
|
| 487 |
+
|
| 488 |
print("move to load rag")
|
| 489 |
master_structured_lookup, faiss_index, document_chunks = model.load_rag_assets(
|
| 490 |
faiss_index_path, document_chunks_path, structured_lookup_path
|