def main():
    opt = parse_args()
    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    logger.info("Extracting features...")

    logger.info("Building `Fields` object...")
    fields = get_fields()
    task1_fields = get_task_fields()
    task2_fields = get_task2_fields()

    logger.info("Building & saving task training data...")
    train_dataset_files = build_save_dataset('train', 'task', fields, opt)
    logger.info("Building & saving task2 training data...")
    train_dataset_files2 = build_save_dataset('train', 'task2', fields, opt)

    logger.info("Building & saving task validation data...")
    build_save_dataset('valid', 'task', fields, opt)
    logger.info("Building & saving task2 validation data...")
    build_save_dataset('valid', 'task2', fields, opt)

    logger.info("Building & saving vocabulary...")

    build_save_vocab(train_dataset_files + train_dataset_files2, fields, opt)
def main():
    opt = parse_args()
    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    logger.info("Extracting features...")

    logger.info("Building 'Fields' object...")
    fields = get_fields()

    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset('train', fields, opt)  # 返回生成的文件列表

    logger.info("Building & saving validation data...")
    build_save_dataset('valid', fields, opt)

    logger.info("Building & saving vocabulary...")
    build_save_vocab(train_dataset_files, fields, opt)  # only用train集创建vocabulary
示例#3
0
def main():
    opt = parse_args()  #get the opt augment
    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    logger.info("Extracting features...")

    logger.info("Building `Fields` object...")
    fields = get_fields(
    )  #get the dict ,it save the torchtext.data src ,target &indices without data
    # x_train, x_valid, x_test, y_train_emo, y_valid_emo, y_test_emo = data_loader.test_mosei_emotion_data()
    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset(
        'train', fields, opt)  #shard the source retutn the shard file paths
    logger.info("Building & saving validation data...")
    build_save_dataset('valid', fields,
                       opt)  ##shard the source retutn the shard file paths

    logger.info("Building & saving vocabulary...")
    build_save_vocab(train_dataset_files, fields, opt)
示例#4
0
def main():
    opt = parse_args()

    if (opt.shuffle > 0):
        raise AssertionError("-shuffle is not implemented, please make sure \
                         you shuffle your data before pre-processing.")
    print(opt)
    # 全部日志写入file以及console
    init_logger(opt.log_file)
    logger.info("Extracting features...")

    logger.info("Building `Fields` object...")
    fields = get_fields()

    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset('train', fields, opt)

    logger.info("Building & saving validation data...")
    build_save_dataset('valid', fields, opt)

    logger.info("Building & saving vocabulary...")

    build_save_vocab(train_dataset_files, fields, opt)