def init_model(word2id, opt): model = Seq2SeqLSTMAttention( emb_dim=opt.word_vec_size, vocab_size=opt.vocab_size, src_hidden_dim=opt.rnn_size, trg_hidden_dim=opt.rnn_size, ctx_hidden_dim=opt.rnn_size, attention_mode='dot', batch_size=opt.batch_size, bidirectional=opt.bidirectional, pad_token_src = word2id[pykp.io.PAD_WORD], pad_token_trg = word2id[pykp.io.PAD_WORD], nlayers_src=opt.enc_layers, nlayers_trg=opt.dec_layers, dropout=opt.dropout, teacher_forcing_ratio=opt.teacher_forcing_ratio, scheduled_sampling=opt.scheduled_sampling, scheduled_sampling_batches=opt.scheduled_sampling_batches ) logging.info('====================== Model Parameters =========================') if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) if torch.cuda.is_available(): model.load_state_dict(torch.load(open(opt.train_from, 'rb'))) else: model.load_state_dict(torch.load( open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage )) utils.tally_parameters(model) return model
def init_model(word2id, opt): model = Seq2SeqLSTMAttention( emb_dim=opt.word_vec_size, vocab_size=opt.vocab_size, src_hidden_dim=opt.rnn_size, trg_hidden_dim=opt.rnn_size, ctx_hidden_dim=opt.rnn_size, attention_mode='dot', batch_size=opt.batch_size, bidirectional=opt.bidirectional, pad_token_src=word2id[pykp.io.PAD_WORD], pad_token_trg=word2id[pykp.io.PAD_WORD], nlayers_src=opt.enc_layers, nlayers_trg=opt.dec_layers, dropout=opt.dropout, teacher_forcing_ratio=opt.teacher_forcing_ratio, scheduled_sampling=opt.scheduled_sampling, scheduled_sampling_batches=opt.scheduled_sampling_batches) logging.info( '====================== Model Parameters =========================') if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) if torch.cuda.is_available(): model.load_state_dict(torch.load(open(opt.train_from, 'rb'))) else: model.load_state_dict( torch.load(open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage)) utils.tally_parameters(model) return model
def init_model(opt): logging.info( '====================== Model Parameters =========================') if opt.cascading_model: model = Seq2SeqLSTMAttentionCascading(opt) else: if opt.copy_attention: logging.info('Train a Seq2Seq model with Copy Mechanism') else: logging.info('Train a normal Seq2Seq model') if opt.encoder_type == 'bert': model = Seq2SeqBERT(opt) elif opt.encoder_type == 'bert_low': model = Seq2SeqBERTLow(opt) elif opt.encoder_type == 'transformer': model = Seq2SeqTransformer(opt) else: model = Seq2SeqLSTMAttention(opt) if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) # train_from_model_dir = opt.train_from[:opt.train_from.rfind('model/') + 6] # load the saved the meta-model and override the current one # model = torch.load( # open(os.path.join(opt.model_path, opt.exp + '.initial.model'), 'rb') # ) if torch.cuda.is_available() and opt.useGpu: checkpoint = torch.load(open(opt.train_from, 'rb')) else: checkpoint = torch.load(open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage) # some compatible problems, keys are started with 'module.' # checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v) for k, v in checkpoint.items()]) model.load_state_dict(checkpoint) else: # dump the meta-model torch.save( model.state_dict(), open( os.path.join(opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model'), 'wb')) utils.tally_parameters(model) return model
def main(): # Load checkpoint if we resume from a previous training. if opt.train_from: print('Loading checkpoint from %s' % opt.train_from) checkpoint = torch.load(opt.train_from, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] # I don't like reassigning attributes of opt: it's not clear. opt.start_epoch = checkpoint['epoch'] + 1 elif opt.init_with: print('Loading checkpoint from %s' % opt.init_with) checkpoint = torch.load(opt.init_with, map_location=lambda storage, loc: storage) model_opt = opt elif opt.eval_with: print('Loading checkpoint from %s' % opt.eval_with) checkpoint = torch.load(opt.eval_with, map_location=lambda storage, loc: storage) model_opt = checkpoint["opt"] model_opt.eval_only = 1 else: checkpoint = None model_opt = opt for k, v in vars(model_opt).items(): print("{}: {}".format(k, v)) first_dataset = next(lazily_load_dataset("train")) data_type = first_dataset.data_type fields = load_fields(first_dataset, data_type, checkpoint) collect_report_features(fields) model = build_model(model_opt, opt, fields, checkpoint) tally_parameters(model) check_save_model_path() optim = build_optim(model, checkpoint) train_model(model, fields, optim, data_type, model_opt) if opt.tensorboard: writer.close()
def init_model(opt): logging.info( '====================== Model Parameters =========================') if opt.cascading_model: model = Seq2SeqLSTMAttentionCascading(opt) else: model = Seq2SeqLSTMAttention(opt) if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) # load the saved the meta-model and override the current one # model = torch.load( # open(os.path.join(opt.model_path, opt.exp, '.initial.model'), 'wb') # ) if torch.cuda.is_available() and opt.use_gpu: checkpoint = torch.load(open(opt.train_from, 'rb')) else: checkpoint = torch.load(open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage) # some compatible problems, keys are started with 'module.' checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v) for k, v in checkpoint.items()]) model.load_state_dict(checkpoint) else: # dump the meta-model meta_model_dir = os.path.join( opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model') torch.save(model.state_dict(), open(meta_model_dir, 'wb')) if torch.cuda.is_available() and opt.use_gpu: model = model.cuda() utils.tally_parameters(model) # embedding = torch.load('embedding40004.pt') # model.init_embedding(embedding,requires_grad=False) return model
def init_model(opt): logging.info( '====================== Model Parameters =========================') if opt.cascading_model: model = Seq2SeqLSTMAttentionCascading(opt) else: model = Seq2SeqLSTMAttention(opt) # opt.train_from ="/home/wangxingpeng/en/keyphrase/model/kp20k.ml.copy.bi-directional.test_sgd_0.01/kp20k.ml.copy.bi-directional.epoch=1.batch=3700.total_batch=3700.model" if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) # load the saved the meta-model and override the current one # model = torch.load( # open(os.path.join(opt.model_path, opt.exp, '.initial.model'), 'wb') # ) if torch.cuda.is_available() and opt.use_gpu: checkpoint = torch.load(open(opt.train_from, 'rb')) else: checkpoint = torch.load(open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage) # some compatible problems, keys are started with 'module.' checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v) for k, v in checkpoint.items()]) model.load_state_dict(checkpoint) else: # dump the meta-model meta_model_dir = os.path.join( opt.train_from[:opt.train_from.find('.epoch=')], 'initial.model') torch.save(model.state_dict(), open(meta_model_dir, 'wb')) if torch.cuda.is_available() and opt.use_gpu: model = model.cuda() utils.tally_parameters(model) return model
def init_model(opt): logging.info('====================== Model Parameters =========================') if not opt.copy_model: logging.info('Train a normal seq2seq model') model = Seq2SeqLSTMAttention( emb_dim=opt.word_vec_size, vocab_size=opt.vocab_size, src_hidden_dim=opt.rnn_size, trg_hidden_dim=opt.rnn_size, ctx_hidden_dim=opt.rnn_size, attention_mode='dot', batch_size=opt.batch_size, bidirectional=opt.bidirectional, pad_token_src = opt.word2id[pykp.io.PAD_WORD], pad_token_trg = opt.word2id[pykp.io.PAD_WORD], nlayers_src=opt.enc_layers, nlayers_trg=opt.dec_layers, dropout=opt.dropout, must_teacher_forcing=opt.must_teacher_forcing, teacher_forcing_ratio=opt.teacher_forcing_ratio, scheduled_sampling=opt.scheduled_sampling, scheduled_sampling_batches=opt.scheduled_sampling_batches, ) else: logging.info('Train a seq2seq model with copy mechanism') model = Seq2SeqLSTMAttentionCopy( emb_dim=opt.word_vec_size, vocab_size=opt.vocab_size, src_hidden_dim=opt.rnn_size, trg_hidden_dim=opt.rnn_size, ctx_hidden_dim=opt.rnn_size, attention_mode='dot', batch_size=opt.batch_size, bidirectional=opt.bidirectional, pad_token_src = opt.word2id[pykp.io.PAD_WORD], pad_token_trg = opt.word2id[pykp.io.PAD_WORD], nlayers_src=opt.enc_layers, nlayers_trg=opt.dec_layers, dropout=opt.dropout, must_teacher_forcing=opt.must_teacher_forcing, teacher_forcing_ratio=opt.teacher_forcing_ratio, scheduled_sampling=opt.scheduled_sampling, scheduled_sampling_batches=opt.scheduled_sampling_batches, unk_word=opt.word2id[pykp.io.UNK_WORD], ) if torch.cuda.is_available(): model = model.cuda() if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) if torch.cuda.is_available(): checkpoint = torch.load(open(opt.train_from, 'rb')) else: checkpoint = torch.load( open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage ) print(checkpoint.keys()) # some compatible problems, keys are started with 'module.' checkpoint = dict([(k[7:],v) if k.startswith('module.') else (k,v) for k,v in checkpoint.items()]) model.load_state_dict(checkpoint) utils.tally_parameters(model) return model
def init_model(opt): logging.info( '====================== Model Parameters =========================') if not opt.copy_model: logging.info('Train a normal seq2seq model') model = Seq2SeqLSTMAttention( emb_dim=opt.word_vec_size, vocab_size=opt.vocab_size, src_hidden_dim=opt.rnn_size, trg_hidden_dim=opt.rnn_size, ctx_hidden_dim=opt.rnn_size, attention_mode='dot', batch_size=opt.batch_size, bidirectional=opt.bidirectional, pad_token_src=opt.word2id[pykp.io.PAD_WORD], pad_token_trg=opt.word2id[pykp.io.PAD_WORD], nlayers_src=opt.enc_layers, nlayers_trg=opt.dec_layers, dropout=opt.dropout, must_teacher_forcing=opt.must_teacher_forcing, teacher_forcing_ratio=opt.teacher_forcing_ratio, scheduled_sampling=opt.scheduled_sampling, scheduled_sampling_batches=opt.scheduled_sampling_batches, ) else: logging.info('Train a seq2seq model with copy mechanism') model = Seq2SeqLSTMAttentionCopy( emb_dim=opt.word_vec_size, vocab_size=opt.vocab_size, src_hidden_dim=opt.rnn_size, trg_hidden_dim=opt.rnn_size, ctx_hidden_dim=opt.rnn_size, attention_mode='dot', batch_size=opt.batch_size, bidirectional=opt.bidirectional, pad_token_src=opt.word2id[pykp.io.PAD_WORD], pad_token_trg=opt.word2id[pykp.io.PAD_WORD], nlayers_src=opt.enc_layers, nlayers_trg=opt.dec_layers, dropout=opt.dropout, must_teacher_forcing=opt.must_teacher_forcing, teacher_forcing_ratio=opt.teacher_forcing_ratio, scheduled_sampling=opt.scheduled_sampling, scheduled_sampling_batches=opt.scheduled_sampling_batches, unk_word=opt.word2id[pykp.io.UNK_WORD], ) if torch.cuda.is_available(): model = model.cuda() if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) if torch.cuda.is_available(): checkpoint = torch.load(open(opt.train_from, 'rb')) else: checkpoint = torch.load(open(opt.train_from, 'rb'), map_location=lambda storage, loc: storage) print(checkpoint.keys()) # some compatible problems, keys are started with 'module.' checkpoint = dict([(k[7:], v) if k.startswith('module.') else (k, v) for k, v in checkpoint.items()]) model.load_state_dict(checkpoint) utils.tally_parameters(model) return model