def build_batchers(net_type, word2id, cuda, debug): assert net_type in ['ff', 'rnn'] prepro = prepro_fn_extract(args.max_word, args.max_sent) def sort_key(sample): src_sents, _ = sample return len(src_sents) batchify_fn = (batchify_fn_extract_ff if net_type == 'ff' else batchify_fn_extract_ptr) convert_batch = (convert_batch_extract_ff if net_type == 'ff' else convert_batch_extract_ptr) batchify = compose(batchify_fn(PAD, cuda=cuda), convert_batch(UNK, word2id)) train_loader = DataLoader( ExtractDataset('train'), batch_size=BUCKET_SIZE, shuffle=not debug, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify, single_run=False, fork=not debug) val_loader = DataLoader( ExtractDataset('val'), batch_size=BUCKET_SIZE, shuffle=False, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify, single_run=True, fork=not debug) return train_batcher, val_batcher
def build_batchers(net_type, word2id, cuda, debug, if_neusum=False, stop=False, combine=False): assert net_type in ['ff', 'rnn', 'nnse'] assert not (combine and if_neusum) prepro = prepro_fn_extract(args.max_word, args.max_sent) def sort_key(sample): src_sents, _ = sample return len(src_sents) if stop: print('add stop') batchify_fn = (batchify_fn_extract_ff if net_type == 'ff' else batchify_fn_extract_ptr) if net_type == 'nnse': batchify_fn = batchify_fn_extract_nnse convert_batch = (convert_batch_extract_ff if net_type in ['ff', 'nnse'] else convert_batch_extract_ptr_stop) else: batchify_fn = (batchify_fn_extract_ff if net_type == 'ff' else batchify_fn_extract_ptr) if net_type == 'nnse': batchify_fn = batchify_fn_extract_nnse convert_batch = (convert_batch_extract_ff if net_type in ['ff', 'nnse'] else convert_batch_extract_ptr) batchify = compose(batchify_fn(PAD, cuda=cuda), convert_batch(UNK, word2id)) if if_neusum: print('Use neusum constrcution') train_loader = DataLoader( ExtractDataset_neusum('train'), batch_size=BUCKET_SIZE, shuffle=not debug, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify, single_run=False, fork=not debug) val_loader = DataLoader( ExtractDataset_neusum('val'), batch_size=BUCKET_SIZE, shuffle=False, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify, single_run=True, fork=not debug) elif combine: print('Use combine constrcution') train_loader = DataLoader( ExtractDataset_combine('train'), batch_size=BUCKET_SIZE, shuffle=not debug, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify, single_run=False, fork=not debug) val_loader = DataLoader( ExtractDataset_combine('val'), batch_size=BUCKET_SIZE, shuffle=False, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify, single_run=True, fork=not debug) else: train_loader = DataLoader( ExtractDataset('train'), batch_size=BUCKET_SIZE, shuffle=not debug, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify, single_run=False, fork=not debug) val_loader = DataLoader( ExtractDataset('val'), batch_size=BUCKET_SIZE, shuffle=False, num_workers=4 if cuda and not debug else 0, collate_fn=coll_fn_extract ) val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify, single_run=True, fork=not debug) return train_batcher, val_batcher