def init_model(word2id, opt):
    model = Seq2SeqLSTMAttention(
        emb_dim=opt.word_vec_size,
        vocab_size=opt.vocab_size,
        src_hidden_dim=opt.rnn_size,
        trg_hidden_dim=opt.rnn_size,
        ctx_hidden_dim=opt.rnn_size,
        attention_mode='dot',
        batch_size=opt.batch_size,
        bidirectional=opt.bidirectional,
        pad_token_src = word2id[pykp.io.PAD_WORD],
        pad_token_trg = word2id[pykp.io.PAD_WORD],
        nlayers_src=opt.enc_layers,
        nlayers_trg=opt.dec_layers,
        dropout=opt.dropout,
        teacher_forcing_ratio=opt.teacher_forcing_ratio,
        scheduled_sampling=opt.scheduled_sampling,
        scheduled_sampling_batches=opt.scheduled_sampling_batches
    )

    logging.info('======================  Model Parameters  =========================')
    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        if torch.cuda.is_available():
            model.load_state_dict(torch.load(open(opt.train_from, 'rb')))
        else:
            model.load_state_dict(torch.load(
                open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage
            ))
    utils.tally_parameters(model)

    return model
예제 #2
0
def init_model(word2id, opt):
    model = Seq2SeqLSTMAttention(
        emb_dim=opt.word_vec_size,
        vocab_size=opt.vocab_size,
        src_hidden_dim=opt.rnn_size,
        trg_hidden_dim=opt.rnn_size,
        ctx_hidden_dim=opt.rnn_size,
        attention_mode='dot',
        batch_size=opt.batch_size,
        bidirectional=opt.bidirectional,
        pad_token_src=word2id[pykp.io.PAD_WORD],
        pad_token_trg=word2id[pykp.io.PAD_WORD],
        nlayers_src=opt.enc_layers,
        nlayers_trg=opt.dec_layers,
        dropout=opt.dropout,
        teacher_forcing_ratio=opt.teacher_forcing_ratio,
        scheduled_sampling=opt.scheduled_sampling,
        scheduled_sampling_batches=opt.scheduled_sampling_batches)

    logging.info(
        '======================  Model Parameters  =========================')
    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        if torch.cuda.is_available():
            model.load_state_dict(torch.load(open(opt.train_from, 'rb')))
        else:
            model.load_state_dict(
                torch.load(open(opt.train_from, 'rb'),
                           map_location=lambda storage, loc: storage))
    utils.tally_parameters(model)

    return model
예제 #3
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.cascading_model:
        model = Seq2SeqLSTMAttentionCascading(opt)
    else:
        if opt.copy_attention:
            logging.info('Train a Seq2Seq model with Copy Mechanism')
        else:
            logging.info('Train a normal Seq2Seq model')

        if opt.encoder_type == 'bert':
            model = Seq2SeqBERT(opt)
        elif opt.encoder_type == 'bert_low':
            model = Seq2SeqBERTLow(opt)
        elif opt.encoder_type == 'transformer':
            model = Seq2SeqTransformer(opt)
        else:
            model = Seq2SeqLSTMAttention(opt)

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        # train_from_model_dir = opt.train_from[:opt.train_from.rfind('model/') + 6]
        # load the saved the meta-model and override the current one
        # model = torch.load(
        #     open(os.path.join(opt.model_path, opt.exp + '.initial.model'), 'rb')
        # )

        if torch.cuda.is_available() and opt.useGpu:
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        # some compatible problems, keys are started with 'module.'
        # checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v) for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)
    else:
        # dump the meta-model
        torch.save(
            model.state_dict(),
            open(
                os.path.join(opt.train_from[:opt.train_from.find('.epoch=')],
                             'initial.model'), 'wb'))

    utils.tally_parameters(model)

    return model
예제 #4
0
def main():
    # Load checkpoint if we resume from a previous training.
    if opt.train_from:
        print('Loading checkpoint from %s' % opt.train_from)
        checkpoint = torch.load(opt.train_from,
                                map_location=lambda storage, loc: storage)
        model_opt = checkpoint['opt']
        # I don't like reassigning attributes of opt: it's not clear.
        opt.start_epoch = checkpoint['epoch'] + 1
    elif opt.init_with:
        print('Loading checkpoint from %s' % opt.init_with)
        checkpoint = torch.load(opt.init_with,
                                map_location=lambda storage, loc: storage)
        model_opt = opt
    elif opt.eval_with:
        print('Loading checkpoint from %s' % opt.eval_with)
        checkpoint = torch.load(opt.eval_with,
                                map_location=lambda storage, loc: storage)
        model_opt = checkpoint["opt"]
        model_opt.eval_only = 1
    else:
        checkpoint = None
        model_opt = opt

    for k, v in vars(model_opt).items():
        print("{}: {}".format(k, v))


    first_dataset = next(lazily_load_dataset("train"))
    data_type = first_dataset.data_type

    fields = load_fields(first_dataset, data_type, checkpoint)

    collect_report_features(fields)

    model = build_model(model_opt, opt, fields, checkpoint)

    tally_parameters(model)
    check_save_model_path()

    optim = build_optim(model, checkpoint)

    train_model(model, fields, optim, data_type, model_opt)

    if opt.tensorboard:
        writer.close()
예제 #5
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.cascading_model:
        model = Seq2SeqLSTMAttentionCascading(opt)
    else:
        model = Seq2SeqLSTMAttention(opt)

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)

        # load the saved the meta-model and override the current one
        # model = torch.load(
        #     open(os.path.join(opt.model_path, opt.exp, '.initial.model'), 'wb')
        # )

        if torch.cuda.is_available() and opt.use_gpu:
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v)
                           for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)
    else:
        # dump the meta-model
        meta_model_dir = os.path.join(
            opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model')

        torch.save(model.state_dict(), open(meta_model_dir, 'wb'))

    if torch.cuda.is_available() and opt.use_gpu:

        model = model.cuda()

    utils.tally_parameters(model)
    # embedding = torch.load('embedding40004.pt')
    # model.init_embedding(embedding,requires_grad=False)

    return model
예제 #6
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if opt.cascading_model:
        model = Seq2SeqLSTMAttentionCascading(opt)
    else:
        model = Seq2SeqLSTMAttention(opt)
    # opt.train_from ="/home/wangxingpeng/en/keyphrase/model/kp20k.ml.copy.bi-directional.test_sgd_0.01/kp20k.ml.copy.bi-directional.epoch=1.batch=3700.total_batch=3700.model"
    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)

        # load the saved the meta-model and override the current one
        # model = torch.load(
        #     open(os.path.join(opt.model_path, opt.exp, '.initial.model'), 'wb')
        # )

        if torch.cuda.is_available() and opt.use_gpu:
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v)
                           for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)
    else:
        # dump the meta-model
        meta_model_dir = os.path.join(
            opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model')

        torch.save(model.state_dict(), open(meta_model_dir, 'wb'))

    if torch.cuda.is_available() and opt.use_gpu:
        model = model.cuda()

    utils.tally_parameters(model)

    return model
def init_model(opt):
    logging.info('======================  Model Parameters  =========================')

    if not opt.copy_model:
        logging.info('Train a normal seq2seq model')
        model = Seq2SeqLSTMAttention(
            emb_dim=opt.word_vec_size,
            vocab_size=opt.vocab_size,
            src_hidden_dim=opt.rnn_size,
            trg_hidden_dim=opt.rnn_size,
            ctx_hidden_dim=opt.rnn_size,
            attention_mode='dot',
            batch_size=opt.batch_size,
            bidirectional=opt.bidirectional,
            pad_token_src = opt.word2id[pykp.io.PAD_WORD],
            pad_token_trg = opt.word2id[pykp.io.PAD_WORD],
            nlayers_src=opt.enc_layers,
            nlayers_trg=opt.dec_layers,
            dropout=opt.dropout,
            must_teacher_forcing=opt.must_teacher_forcing,
            teacher_forcing_ratio=opt.teacher_forcing_ratio,
            scheduled_sampling=opt.scheduled_sampling,
            scheduled_sampling_batches=opt.scheduled_sampling_batches,
        )
    else:
        logging.info('Train a seq2seq model with copy mechanism')
        model = Seq2SeqLSTMAttentionCopy(
            emb_dim=opt.word_vec_size,
            vocab_size=opt.vocab_size,
            src_hidden_dim=opt.rnn_size,
            trg_hidden_dim=opt.rnn_size,
            ctx_hidden_dim=opt.rnn_size,
            attention_mode='dot',
            batch_size=opt.batch_size,
            bidirectional=opt.bidirectional,
            pad_token_src = opt.word2id[pykp.io.PAD_WORD],
            pad_token_trg = opt.word2id[pykp.io.PAD_WORD],
            nlayers_src=opt.enc_layers,
            nlayers_trg=opt.dec_layers,
            dropout=opt.dropout,
            must_teacher_forcing=opt.must_teacher_forcing,
            teacher_forcing_ratio=opt.teacher_forcing_ratio,
            scheduled_sampling=opt.scheduled_sampling,
            scheduled_sampling_batches=opt.scheduled_sampling_batches,
            unk_word=opt.word2id[pykp.io.UNK_WORD],
        )

    if torch.cuda.is_available():
        model = model.cuda()

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        if torch.cuda.is_available():
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(
                open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage
            )
        print(checkpoint.keys())
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:],v) if k.startswith('module.') else (k,v) for k,v in checkpoint.items()])
        model.load_state_dict(checkpoint)

    utils.tally_parameters(model)

    return model
예제 #8
0
def init_model(opt):
    logging.info(
        '======================  Model Parameters  =========================')

    if not opt.copy_model:
        logging.info('Train a normal seq2seq model')
        model = Seq2SeqLSTMAttention(
            emb_dim=opt.word_vec_size,
            vocab_size=opt.vocab_size,
            src_hidden_dim=opt.rnn_size,
            trg_hidden_dim=opt.rnn_size,
            ctx_hidden_dim=opt.rnn_size,
            attention_mode='dot',
            batch_size=opt.batch_size,
            bidirectional=opt.bidirectional,
            pad_token_src=opt.word2id[pykp.io.PAD_WORD],
            pad_token_trg=opt.word2id[pykp.io.PAD_WORD],
            nlayers_src=opt.enc_layers,
            nlayers_trg=opt.dec_layers,
            dropout=opt.dropout,
            must_teacher_forcing=opt.must_teacher_forcing,
            teacher_forcing_ratio=opt.teacher_forcing_ratio,
            scheduled_sampling=opt.scheduled_sampling,
            scheduled_sampling_batches=opt.scheduled_sampling_batches,
        )
    else:
        logging.info('Train a seq2seq model with copy mechanism')
        model = Seq2SeqLSTMAttentionCopy(
            emb_dim=opt.word_vec_size,
            vocab_size=opt.vocab_size,
            src_hidden_dim=opt.rnn_size,
            trg_hidden_dim=opt.rnn_size,
            ctx_hidden_dim=opt.rnn_size,
            attention_mode='dot',
            batch_size=opt.batch_size,
            bidirectional=opt.bidirectional,
            pad_token_src=opt.word2id[pykp.io.PAD_WORD],
            pad_token_trg=opt.word2id[pykp.io.PAD_WORD],
            nlayers_src=opt.enc_layers,
            nlayers_trg=opt.dec_layers,
            dropout=opt.dropout,
            must_teacher_forcing=opt.must_teacher_forcing,
            teacher_forcing_ratio=opt.teacher_forcing_ratio,
            scheduled_sampling=opt.scheduled_sampling,
            scheduled_sampling_batches=opt.scheduled_sampling_batches,
            unk_word=opt.word2id[pykp.io.UNK_WORD],
        )

    if torch.cuda.is_available():
        model = model.cuda()

    if opt.train_from:
        logging.info("loading previous checkpoint from %s" % opt.train_from)
        if torch.cuda.is_available():
            checkpoint = torch.load(open(opt.train_from, 'rb'))
        else:
            checkpoint = torch.load(open(opt.train_from, 'rb'),
                                    map_location=lambda storage, loc: storage)
        print(checkpoint.keys())
        # some compatible problems, keys are started with 'module.'
        checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v)
                           for k, v in checkpoint.items()])
        model.load_state_dict(checkpoint)

    utils.tally_parameters(model)

    return model