Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +6 -4
pipeline.py
CHANGED
|
@@ -296,7 +296,7 @@ def pipeline_with_gemini(accessions):
|
|
| 296 |
if iso != "unknown": query_kw = iso
|
| 297 |
else: query_kw = acc
|
| 298 |
#text_link, tables_link, final_input_link = data_preprocess.preprocess_document(link,saveLinkFolder, isolate=query_kw)
|
| 299 |
-
success_process, output_process = run_with_timeout(data_preprocess.preprocess_document,args=(link,sample_folder_id),kwargs={"isolate":query_kw},timeout=
|
| 300 |
if success_process:
|
| 301 |
text_link, tables_link, final_input_link = output_process[0], output_process[1], output_process[2]
|
| 302 |
print("yes succeed for process document")
|
|
@@ -377,16 +377,18 @@ def pipeline_with_gemini(accessions):
|
|
| 377 |
faiss_filename = "faiss_index.bin"
|
| 378 |
chunks_filename = "document_chunks.json"
|
| 379 |
lookup_filename = "structured_lookup.json"
|
| 380 |
-
|
|
|
|
| 381 |
# Save in temporary local directory
|
| 382 |
faiss_index_path = os.path.join(tmp_dir, faiss_filename)
|
| 383 |
document_chunks_path = os.path.join(tmp_dir, chunks_filename)
|
| 384 |
structured_lookup_path = os.path.join(tmp_dir, lookup_filename)
|
| 385 |
-
|
|
|
|
| 386 |
download_file_from_drive(faiss_filename, sample_folder_id, faiss_index_path)
|
| 387 |
download_file_from_drive(chunks_filename, sample_folder_id, document_chunks_path)
|
| 388 |
download_file_from_drive(lookup_filename, sample_folder_id, structured_lookup_path)
|
| 389 |
-
|
| 390 |
master_structured_lookup, faiss_index, document_chunks = model.load_rag_assets(
|
| 391 |
faiss_index_path, document_chunks_path, structured_lookup_path
|
| 392 |
)
|
|
|
|
| 296 |
if iso != "unknown": query_kw = iso
|
| 297 |
else: query_kw = acc
|
| 298 |
#text_link, tables_link, final_input_link = data_preprocess.preprocess_document(link,saveLinkFolder, isolate=query_kw)
|
| 299 |
+
success_process, output_process = run_with_timeout(data_preprocess.preprocess_document,args=(link,sample_folder_id),kwargs={"isolate":query_kw},timeout=100)
|
| 300 |
if success_process:
|
| 301 |
text_link, tables_link, final_input_link = output_process[0], output_process[1], output_process[2]
|
| 302 |
print("yes succeed for process document")
|
|
|
|
| 377 |
faiss_filename = "faiss_index.bin"
|
| 378 |
chunks_filename = "document_chunks.json"
|
| 379 |
lookup_filename = "structured_lookup.json"
|
| 380 |
+
print("name of faiss: ", faiss_filename)
|
| 381 |
+
|
| 382 |
# Save in temporary local directory
|
| 383 |
faiss_index_path = os.path.join(tmp_dir, faiss_filename)
|
| 384 |
document_chunks_path = os.path.join(tmp_dir, chunks_filename)
|
| 385 |
structured_lookup_path = os.path.join(tmp_dir, lookup_filename)
|
| 386 |
+
print("name if faiss path: ", faiss_index_path)
|
| 387 |
+
|
| 388 |
download_file_from_drive(faiss_filename, sample_folder_id, faiss_index_path)
|
| 389 |
download_file_from_drive(chunks_filename, sample_folder_id, document_chunks_path)
|
| 390 |
download_file_from_drive(lookup_filename, sample_folder_id, structured_lookup_path)
|
| 391 |
+
print("move to load rag")
|
| 392 |
master_structured_lookup, faiss_index, document_chunks = model.load_rag_assets(
|
| 393 |
faiss_index_path, document_chunks_path, structured_lookup_path
|
| 394 |
)
|