Exemplo n.º 1
0
    def __init__(self, abs_dir, max_len=30, cuda=True):
        abs_meta = json.load(open(join(abs_dir, 'meta.json')))
        assert abs_meta['net'] == 'base_abstractor'
        abs_args = abs_meta['net_args']
        abs_ckpt = load_best_ckpt(abs_dir)
        word2id = pkl.load(open(join(abs_dir, 'vocab.pkl'), 'rb'))
        elmo = None
        if 'elmo' in abs_args:
            elmo_args = abs_args['elmo']
            vocab_to_cache = [
                w for w, i in sorted(list(word2id.items()), key=itemgetter(1))
            ]
            elmo = get_elmo(dropout=elmo_args.get('dropout', 0),
                            vocab_to_cache=vocab_to_cache,
                            cuda=cuda,
                            projection_dim=elmo_args.get(
                                'projection_dim', None))
            del abs_args['elmo']

        abstractor = CopySumm(**abs_args)
        if elmo is not None:
            abstractor.set_elmo_embedding(elmo)
        abstractor.load_state_dict(abs_ckpt)
        self._device = torch.device('cuda' if cuda else 'cpu')
        self._net = abstractor.to(self._device)
        self._word2id = word2id
        self._id2word = {i: w for w, i in word2id.items()}
        self._max_len = max_len
Exemplo n.º 2
0
    def __init__(self, ext_dir, cuda=True):
        ext_meta = json.load(open(join(ext_dir, 'meta.json')))
        assert ext_meta['net'] == 'rnn-ext_abs_rl'
        ext_args = ext_meta['net_args']['extractor']['net_args']
        word2id = pkl.load(open(join(ext_dir, 'agent_vocab.pkl'), 'rb'))

        elmo = None
        if 'elmo' in ext_args:
            elmo_args = ext_args['elmo']
            vocab_to_cache = [
                w for w, i in sorted(list(word2id.items()), key=itemgetter(1))
            ]
            elmo = get_elmo(dropout=elmo_args.get('dropout', 0),
                            vocab_to_cache=vocab_to_cache,
                            cuda=cuda,
                            projection_dim=elmo_args.get(
                                'projection_dim', None))
            del ext_args['elmo']
        extractor = PtrExtractSumm(**ext_args)
        if elmo is not None:
            extractor.set_elmo_embedding(elmo)

        agent = ActorCritic(extractor._sent_enc,
                            extractor._art_enc, extractor._extractor,
                            ArticleBatcher(word2id, cuda))
        ext_ckpt = load_best_ckpt(ext_dir, reverse=True)
        agent.load_state_dict(ext_ckpt)
        self._device = torch.device('cuda' if cuda else 'cpu')
        self._net = agent.to(self._device)
        self._word2id = word2id
        self._id2word = {i: w for w, i in word2id.items()}
Exemplo n.º 3
0
def load_ext_net(ext_dir):
    ext_meta = json.load(open(join(ext_dir, 'meta.json')))
    assert ext_meta['net'] == 'ml_rnn_extractor'
    ext_ckpt = load_best_ckpt(ext_dir)
    ext_args = ext_meta['net_args']
    vocab = pkl.load(open(join(ext_dir, 'vocab.pkl'), 'rb'))
    elmo = None
    if 'elmo' in ext_args:
        elmo_args = ext_args['elmo']
        vocab_to_cache = [
            w for w, i in sorted(list(vocab.items()), key=itemgetter(1))
        ]
        elmo = get_elmo(dropout=elmo_args.get('dropout', 0),
                        vocab_to_cache=vocab_to_cache)
        del ext_args['elmo']
    ext = PtrExtractSumm(**ext_args)
    if elmo is not None:
        ext.set_elmo_embedding(elmo)
    ext.load_state_dict(ext_ckpt)
    return ext, vocab
Exemplo n.º 4
0
def main(args):
    abstractor = get_abstractor(args.abs_dir, args.beam_search, args.cuda)
    for split in ('train', 'val'):
        decode(args, split)

    embedding = abstractor._net._decoder._embedding
    word2id = abstractor._word2id
    id2words = {i: w for w, i in word2id.items()}

    elmo = None
    if args.elmo:
        elmo = get_elmo(dropout=args.elmo_dropout,
                        vocab_to_cache=[id2words[i] for i in range(len(id2words))],
                        cuda=args.cuda,
                        projection_dim=args.elmo_projection)
        args.emb_dim = elmo.get_output_dim()

    meta = {
        'net': '{}_discriminator'.format('cnn'),
        'net_args': {
            'vocab_size': len(abstractor._word2id),
            'emb_dim': embedding.embedding_dim,
            'kernel_num': args.kernel_num,
            'kernel_sizes': args.kernel_sizes,
            'class_num': 2,
            'dropout': args.dropout,
            'max_norm': args.max_norm,
            'static': args.static,
        },
        'training_params': {
            'optimizer': ('adam', {'lr': args.lr}),
            'batch_size': args.batch,
            'clip_grad_norm': args.clip,
            'lr_decay': args.decay,
        }
    }

    net = ConvNet(**meta['net_args'])

    if elmo:
        meta['net_args']['elmo'] = {
            'dropout': args.elmo_dropout,
            'projection': args.elmo_projection,
        }
        net.set_elmo_embedding(elmo)
    else:
        net.set_embedding(embedding.weight)

    train_batcher, val_batcher = build_batchers(args, word2id)

    def criterion(logit, target):
        return F.cross_entropy(logit, target, reduce=False)

    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True,
                                  factor=args.decay, min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline('discriminator', net,
                             train_batcher, val_batcher, args.batch, val_fn,
                             criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path,
                           args.ckpt_freq, args.patience, scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Exemplo n.º 5
0
def main(args):
    assert args.net_type in ['ff', 'rnn']
    # create data batcher, vocabulary
    # batcher
    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)
    id2words = {i: w for w, i in word2id.items()}

    elmo = None
    if args.elmo:
        elmo = get_elmo(
            dropout=args.elmo_dropout,
            vocab_to_cache=[id2words[i] for i in range(len(id2words))],
            cuda=args.cuda,
            projection_dim=args.elmo_projection)
        args.emb_dim = elmo.get_output_dim()

    train_batcher, val_batcher = build_batchers(args.net_type, word2id,
                                                args.cuda, args.debug)

    # make net
    net, net_args = configure_net(args.net_type, len(word2id), args.emb_dim,
                                  args.conv_hidden, args.lstm_hidden,
                                  args.lstm_layer, args.bi)

    if elmo:
        net_args['elmo'] = {
            'dropout': args.elmo_dropout,
            'projection': args.elmo_projection,
        }
        net.set_elmo_embedding(elmo)
    elif args.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, _ = make_embedding(id2words, args.w2v)
        net.set_embedding(embedding)

    # configure training setting
    criterion, train_params = configure_training(args.net_type, 'adam',
                                                 args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {
        'net': 'ml_{}_extractor'.format(args.net_type),
        'net_args': net_args,
        'traing_params': train_params
    }
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),
                           **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher,
                             args.batch, val_fn, criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience,
                           scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()