Spaces:
Sleeping
Sleeping
Update alignment/data.py
Browse files- alignment/data.py +9 -0
alignment/data.py
CHANGED
|
@@ -18,6 +18,7 @@ import re
|
|
| 18 |
from typing import List, Literal, Optional
|
| 19 |
|
| 20 |
from datasets import DatasetDict, concatenate_datasets, load_dataset
|
|
|
|
| 21 |
|
| 22 |
from .configs import DataArguments
|
| 23 |
|
|
@@ -34,6 +35,14 @@ def apply_chat_template(
|
|
| 34 |
|
| 35 |
if task in ["sft", "generation"]:
|
| 36 |
messages = example["messages"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# We add an empty system message if there is none
|
| 38 |
if messages[0]["role"] != "system":
|
| 39 |
messages.insert(0, {"role": "system", "content": ""})
|
|
|
|
| 18 |
from typing import List, Literal, Optional
|
| 19 |
|
| 20 |
from datasets import DatasetDict, concatenate_datasets, load_dataset
|
| 21 |
+
from googletrans import Translator
|
| 22 |
|
| 23 |
from .configs import DataArguments
|
| 24 |
|
|
|
|
| 35 |
|
| 36 |
if task in ["sft", "generation"]:
|
| 37 |
messages = example["messages"]
|
| 38 |
+
print("Message Length: ", len(messages))
|
| 39 |
+
for i in range(len(messages)):
|
| 40 |
+
try:
|
| 41 |
+
translator = Translator()
|
| 42 |
+
messages[i]["content"] = translator.translate(messages[i]["content"], dest='hi').text
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(e, messages[i]["content"])
|
| 45 |
+
messages[i]["content"] = ""
|
| 46 |
# We add an empty system message if there is none
|
| 47 |
if messages[0]["role"] != "system":
|
| 48 |
messages.insert(0, {"role": "system", "content": ""})
|