Spaces:
Running
Running
Update data_preprocess.py
Browse files- data_preprocess.py +31 -24
data_preprocess.py
CHANGED
|
@@ -532,32 +532,39 @@ from pipeline import upload_file_to_drive
|
|
| 532 |
# print(f"Text successfully saved to '{file_path}'")
|
| 533 |
# except Exception as e:
|
| 534 |
# print(f"Error saving text to docx file: {e}")
|
| 535 |
-
def save_text_to_docx(text_content: str, filename: str, drive_folder_id: str):
|
| 536 |
-
|
| 537 |
-
|
| 538 |
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
|
| 559 |
-
except Exception as e:
|
| 560 |
-
print(f"β Error saving or uploading DOCX: {e}")
|
| 561 |
|
| 562 |
|
| 563 |
'''2 scenerios:
|
|
|
|
| 532 |
# print(f"Text successfully saved to '{file_path}'")
|
| 533 |
# except Exception as e:
|
| 534 |
# print(f"Error saving text to docx file: {e}")
|
| 535 |
+
# def save_text_to_docx(text_content: str, filename: str, drive_folder_id: str):
|
| 536 |
+
# """
|
| 537 |
+
# Saves a given text string into a .docx file locally, then uploads to Google Drive.
|
| 538 |
|
| 539 |
+
# Args:
|
| 540 |
+
# text_content (str): The text string to save.
|
| 541 |
+
# filename (str): The target .docx file name, e.g. 'BRU18_merged_document.docx'.
|
| 542 |
+
# drive_folder_id (str): Google Drive folder ID where to upload the file.
|
| 543 |
+
# """
|
| 544 |
+
# try:
|
| 545 |
+
# # β
Save to temporary local path first
|
| 546 |
+
# print("file name: ", filename)
|
| 547 |
+
# print("length text content: ", len(text_content))
|
| 548 |
+
# local_path = os.path.join(tempfile.gettempdir(), filename)
|
| 549 |
+
# document = Document()
|
| 550 |
+
# for paragraph_text in text_content.split('\n'):
|
| 551 |
+
# document.add_paragraph(paragraph_text)
|
| 552 |
+
# document.save(local_path)
|
| 553 |
+
# print(f"β
Text saved locally to: {local_path}")
|
| 554 |
+
|
| 555 |
+
# # β
Upload to Drive
|
| 556 |
+
# pipeline.upload_file_to_drive(local_path, filename, drive_folder_id)
|
| 557 |
+
# print(f"β
Uploaded '{filename}' to Google Drive folder ID: {drive_folder_id}")
|
| 558 |
+
|
| 559 |
+
# except Exception as e:
|
| 560 |
+
# print(f"β Error saving or uploading DOCX: {e}")
|
| 561 |
+
def save_text_to_docx(text_content: str, full_local_path: str):
|
| 562 |
+
document = Document()
|
| 563 |
+
for paragraph_text in text_content.split('\n'):
|
| 564 |
+
document.add_paragraph(paragraph_text)
|
| 565 |
+
document.save(full_local_path)
|
| 566 |
+
print(f"β
Saved DOCX locally: {full_local_path}")
|
| 567 |
|
|
|
|
|
|
|
| 568 |
|
| 569 |
|
| 570 |
'''2 scenerios:
|