Exemplo n.º 1
0
def build_batchers(word2id, cuda, debug):
    prepro = prepro_fn(args.max_art, args.max_abs)

    def sort_key(sample):
        src, target = sample
        return (len(target), len(src))

    batchify = compose(batchify_fn_copy_rl(PAD, START, END, cuda=cuda),
                       convert_batch_copy_rl(UNK, word2id))

    train_loader = DataLoader(MatchDataset_all2all('train'),
                              batch_size=BUCKET_SIZE,
                              shuffle=not debug,
                              num_workers=4 if cuda and not debug else 0,
                              collate_fn=coll_fn)
    train_batcher = BucketedGenerater(train_loader,
                                      prepro,
                                      sort_key,
                                      batchify,
                                      single_run=False,
                                      fork=not debug)
    val_loader = DataLoader(MatchDataset_all2all('val'),
                            batch_size=BUCKET_SIZE,
                            shuffle=False,
                            num_workers=4 if cuda and not debug else 0,
                            collate_fn=coll_fn)
    val_batcher = BucketedGenerater(val_loader,
                                    prepro,
                                    sort_key,
                                    batchify,
                                    single_run=True,
                                    fork=not debug)
    return train_batcher, val_batcher
Exemplo n.º 2
0
def build_batchers(word2id, cuda, debug):
    prepro = prepro_fn(args.max_art, args.max_abs)
    def sort_key(sample):
        src, target = sample
        return (len(target), len(src))
    batchify = compose(
        batchify_fn_copy(PAD, START, END, cuda=cuda),
        convert_batch_copy(UNK, word2id)
    )

    train_loader = DataLoader(
        MatchDataset('train'), batch_size=BUCKET_SIZE,
        shuffle=not debug,
        num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn
    )
    train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
                                      single_run=False, fork=not debug)

    val_loader = DataLoader(
        MatchDataset('val'), batch_size=BUCKET_SIZE,
        shuffle=False, num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn
    )
    val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
                                    single_run=True, fork=not debug)
    return train_batcher, val_batcher