Skip to content

Commit dcbf01b

Browse files
authored
Merge pull request hiyouga#7244 from hiyouga/hiyouga/token
[data] avoid exit after saving preprocessed data
2 parents ef7af45 + 1a800f9 commit dcbf01b

File tree

1 file changed

+2
-5
lines changed

1 file changed

+2
-5
lines changed

src/llamafactory/data/loader.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
import os
16-
import sys
1716
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
1817

1918
import numpy as np
@@ -325,12 +324,10 @@ def get_dataset(
325324
)
326325

327326
dataset_dict = split_dataset(dataset, eval_dataset, data_args, seed=training_args.seed)
328-
if data_args.tokenized_path is not None: # save tokenized dataset to disk and exit
327+
if data_args.tokenized_path is not None: # save tokenized dataset to disk
329328
if training_args.should_save:
330329
dataset_dict.save_to_disk(data_args.tokenized_path)
331330
logger.info_rank0(f"Tokenized dataset is saved at {data_args.tokenized_path}.")
332-
logger.info_rank0(f"Please restart the training with `tokenized_path: {data_args.tokenized_path}`.")
333-
334-
sys.exit(0)
331+
logger.info_rank0(f"Please launch the training with `tokenized_path: {data_args.tokenized_path}`.")
335332

336333
return get_dataset_module(dataset_dict)

0 commit comments

Comments
 (0)