def main(): opt = parse_args() if (opt.shuffle > 0): raise AssertionError("-shuffle is not implemented, please make sure \ you shuffle your data before pre-processing.") init_logger(opt.log_file) logger.info("Input args: %r", opt) logger.info("Extracting features...") logger.info("Building `Fields` object...") fields = get_fields() task1_fields = get_task_fields() task2_fields = get_task2_fields() logger.info("Building & saving task training data...") train_dataset_files = build_save_dataset('train', 'task', fields, opt) logger.info("Building & saving task2 training data...") train_dataset_files2 = build_save_dataset('train', 'task2', fields, opt) logger.info("Building & saving task validation data...") build_save_dataset('valid', 'task', fields, opt) logger.info("Building & saving task2 validation data...") build_save_dataset('valid', 'task2', fields, opt) logger.info("Building & saving vocabulary...") build_save_vocab(train_dataset_files + train_dataset_files2, fields, opt)
def main(): opt = parse_args() if (opt.shuffle > 0): raise AssertionError("-shuffle is not implemented, please make sure \ you shuffle your data before pre-processing.") init_logger(opt.log_file) logger.info("Input args: %r", opt) logger.info("Extracting features...") logger.info("Building 'Fields' object...") fields = get_fields() logger.info("Building & saving training data...") train_dataset_files = build_save_dataset('train', fields, opt) # 返回生成的文件列表 logger.info("Building & saving validation data...") build_save_dataset('valid', fields, opt) logger.info("Building & saving vocabulary...") build_save_vocab(train_dataset_files, fields, opt) # only用train集创建vocabulary
def main(): opt = parse_args() #get the opt augment if (opt.shuffle > 0): raise AssertionError("-shuffle is not implemented, please make sure \ you shuffle your data before pre-processing.") init_logger(opt.log_file) logger.info("Input args: %r", opt) logger.info("Extracting features...") logger.info("Building `Fields` object...") fields = get_fields( ) #get the dict ,it save the torchtext.data src ,target &indices without data # x_train, x_valid, x_test, y_train_emo, y_valid_emo, y_test_emo = data_loader.test_mosei_emotion_data() logger.info("Building & saving training data...") train_dataset_files = build_save_dataset( 'train', fields, opt) #shard the source retutn the shard file paths logger.info("Building & saving validation data...") build_save_dataset('valid', fields, opt) ##shard the source retutn the shard file paths logger.info("Building & saving vocabulary...") build_save_vocab(train_dataset_files, fields, opt)
def main(): opt = parse_args() if (opt.shuffle > 0): raise AssertionError("-shuffle is not implemented, please make sure \ you shuffle your data before pre-processing.") print(opt) # 全部日志写入file以及console init_logger(opt.log_file) logger.info("Extracting features...") logger.info("Building `Fields` object...") fields = get_fields() logger.info("Building & saving training data...") train_dataset_files = build_save_dataset('train', fields, opt) logger.info("Building & saving validation data...") build_save_dataset('valid', fields, opt) logger.info("Building & saving vocabulary...") build_save_vocab(train_dataset_files, fields, opt)