Exemplo n.º 1
0
def nmt_construction():
    config_file = "/home/user_data/baoy/projects/seq2seq_parser/configs/data_configs/nmt.yaml"
    args_dict = yaml_load_dict(config_file)
    args = dict_to_args(args_dict)
    data_dir = args.origin_tgts
    data_dict = {
        "train": "train.s2b",
        "dev": "dev.s2b",
        "test": "test.s2b",
    }
    make_dataset(
        data_dir=data_dir,
        data_dict=data_dict,
        tgt_dir=data_dir,
        max_src_vocab=args.max_src_vocab,
        max_tgt_vocab=args.max_tgt_vocab,
        vocab_freq_cutoff=args.cut_off,
        max_src_length=-1,
        max_tgt_length=-1,
    )
    make_dataset(
        data_dir=data_dir,
        data_dict=data_dict,
        tgt_dir=args.sample_tgts,
        max_src_vocab=args.max_src_vocab,
        max_tgt_vocab=args.max_tgt_vocab,
        vocab_freq_cutoff=args.cut_off,
        max_src_length=args.max_src_length,
        max_tgt_length=args.max_tgt_length,
    )
Exemplo n.º 2
0
def snli_sample_construction(is_write=True):
    config_file = "/home/user_data/baoy/projects/seq2seq_parser/configs/data_configs/snli-sample.yaml"
    args_dict = yaml_load_dict(config_file)
    args = dict_to_args(args_dict)
    data_dir = args.origin_tgts
    data_dict = {
        "train": "train.s2b",
        "dev": "dev.s2b",
        "test": "test.s2b",
    }
    make_dataset(
        data_dir=data_dir,
        data_dict=data_dict,
        tgt_dir=args.target_tgts,
        max_src_vocab=args.max_src_vocab,
        max_tgt_vocab=args.max_tgt_vocab,
        vocab_freq_cutoff=args.cut_off,
        max_src_length=-1,
        max_tgt_length=-1,
        write_down=is_write,
    )
Exemplo n.º 3
0
def quora_construction(
        config_file="/home/user_data/baoy/projects/seq2seq_parser/configs/data_configs/quora-50k.yaml",
        is_write=True):
    # config_file = "/home/user_data/baoy/projects/seq2seq_parser/configs/data_configs/quora-50k.yaml"
    args_dict = yaml_load_dict(config_file)
    args = dict_to_args(args_dict)
    data_dir = args.origin_tgts
    data_dict = {
        "train": "train.s2b",
        "dev": "dev.s2b",
        "test": "test.s2b",
    }
    make_dataset(data_dir=data_dir,
                 data_dict=data_dict,
                 tgt_dir=args.data_tgts,
                 max_src_vocab=args.max_src_vocab,
                 max_tgt_vocab=args.max_tgt_vocab,
                 vocab_freq_cutoff=args.cut_off,
                 max_src_length=args.max_src_length,
                 max_tgt_length=args.max_tgt_length,
                 train_size=args.train_size,
                 write_down=is_write)