def main(): logging.info("Build dataset...") prob_and_idx = get_prob_idx() dataset = build_dataset(opt, [opt.input, opt.truth or opt.input], opt.vocab, device, prob_and_idx, train=False) fields = dataset.fields pad_ids = {"src": fields["src"].pad_id, "tgt": fields["tgt"].pad_id} vocab_sizes = { "src": len(fields["src"].vocab), "tgt": len(fields["tgt"].vocab) } # load checkpoint from model_path logging.info("Load checkpoint from %s." % opt.model_path) checkpoint = torch.load(opt.model_path, map_location=lambda storage, loc: storage) logging.info("Build model...") model = NMTModel.load_model(checkpoint["opt"], pad_ids, vocab_sizes, checkpoint["model"]).to(device).eval() logging.info("Start translation...") with torch.set_grad_enabled(False): translate(dataset, fields, model)
def main(): logging.info("Build dataset...") prob_and_idx = get_prob_idx(opt.strategy) train_dataset = build_dataset(opt, opt.train, opt.vocab, device, prob_and_idx, train=True) valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, prob_and_idx, train=False) fields = valid_dataset.fields = train_dataset.fields logging.info("Build model...") pad_ids = {"src": fields["src"].pad_id, "tgt": fields["tgt"].pad_id} vocab_sizes = { "src": len(fields["src"].vocab), "tgt": len(fields["tgt"].vocab) } model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device) criterion = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["tgt"], pad_ids["tgt"]).to(device) n_step = int(opt.train_from.split("-")[-1]) if opt.train_from else 1 optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, n_step) logging.info("start training...") train(model, criterion, optimizer, train_dataset, valid_dataset)
def main(): logging.info("Build dataset...") valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False) logging.info("Built dataset valid ...") train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True) logging.info("Built dataset train ...") fields = valid_dataset.fields = train_dataset.fields logging.info("Build model...") pad_ids = {"source": fields["source"].pad_id, "summary_cn": fields["summary_cn"].pad_id, "summary_en": fields["summary_en"].pad_id} vocab_sizes = {"source": len(fields["source"].vocab), "summary_cn": len(fields["summary_cn"].vocab), "summary_en": len(fields["summary_en"].vocab)} print(vocab_sizes) model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device) criterion_cn = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["summary_cn"], pad_ids["summary_cn"]).to(device) criterion_en = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["summary_en"], pad_ids["summary_en"]).to(device) n_step = int(opt.train_from.split("-")[-1]) if opt.train_from else 1 optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, n_step) logging.info("start training...") train(model, pad_ids, vocab_sizes, criterion_cn, criterion_en, optimizer, train_dataset, valid_dataset)
def main(): sim_model = load_sim_model(opt.sim_model_file) logging.info("Build dataset...") train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True) valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False) fields = valid_dataset.fields = train_dataset.fields logging.info("Build model...") pad_ids = {"src": fields["src"].pad_id, "task1_tgt": fields["task1_tgt"].pad_id, "task2_tgt": fields["task2_tgt"].pad_id} vocab_sizes = {"src": len(fields["src"].vocab), "task1_tgt": len(fields["task1_tgt"].vocab), "task2_tgt": len(fields["task2_tgt"].vocab)} model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device) criterion_task1 = MyLabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task1_tgt"], pad_ids["task1_tgt"]).to(device) criterion_task2 = MyLabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task2_tgt"], pad_ids["task2_tgt"]).to(device) n_step = 1 optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, n_step) logging.info("start training...") train(sim_model, model, criterion_task1, criterion_task2, optimizer, train_dataset, valid_dataset, fields, opt.alpha, opt.beta)
def main(): logger.info("Build dataset...") dataset = build_dataset(opt, [opt.input, opt.input], opt.vocab, device, train=False) logger.info("Build model...") model = NMTModel.load_model(loader, dataset.fields).to(device).eval() logger.info("Start translation...") with torch.set_grad_enabled(False): translate(dataset, dataset.fields, model)
def main(): logging.info("Build dataset...") train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True) valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False) fields = valid_dataset.fields = train_dataset.fields logging.info("Build model...") pad_ids = { "src": fields["src"].pad_id, "task1_tgt": fields["task1_tgt"].pad_id, "task2_tgt": fields["task2_tgt"].pad_id, "task3_tgt": fields["task3_tgt"].pad_id } vocab_sizes = { "src": len(fields["src"].vocab), "task1_tgt": len(fields["task1_tgt"].vocab), "task2_tgt": len(fields["task2_tgt"].vocab), "task3_tgt": len(fields["task3_tgt"].vocab) } print(vocab_sizes) model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device) # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model, device_ids=[0,1]) # for MT criterion_task1 = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task1_tgt"], pad_ids["task1_tgt"]).to(device) # for MS criterion_task2 = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task2_tgt"], pad_ids["task2_tgt"]).to(device) # for CLS criterion_task3 = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task3_tgt"], pad_ids["task3_tgt"]).to(device) n_step = int(opt.train_from.split("-")[-1]) if opt.train_from else 1 optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, n_step) logging.info("start training...") train(model, pad_ids, vocab_sizes, criterion_task1, criterion_task2, criterion_task3, optimizer, train_dataset, valid_dataset)
def main(): logger.info("Build dataset...") train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True) valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False) fields = valid_dataset.fields = train_dataset.fields logger.info("Build model...") model = NMTModel.load_model(loader, fields) criterion = LabelSmoothingLoss(opt.label_smoothing, len(fields["tgt"].vocab), fields["tgt"].pad_id) model = nn.DataParallel(FullModel(model, criterion)).to(device) optimizer = WarmAdam(model.module.model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, loader.step) logger.info("start training...") train(model, optimizer, train_dataset, valid_dataset)
def main(): logging.info("Build dataset...") valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False) fields = valid_dataset.fields pad_ids = { "source": fields["source"].pad_id, "summary_cn": fields["summary_cn"].pad_id, "summary_en": fields["summary_en"].pad_id } vocab_sizes = { "source": len(fields["source"].vocab), "summary_cn": len(fields["summary_cn"].vocab), "summary_en": len(fields["summary_en"].vocab) } # load checkpoint from model_path for ckpt in range(1100000, 1345001, 5000): model_path = opt.model_path + '/checkpoint-step-' + str(ckpt) logging.info("Load checkpoint from %s." % model_path) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) logging.info("Build model...") model = NMTModel.load_model(checkpoint["opt"], pad_ids, vocab_sizes, checkpoint["model"]).to(device).eval() criterion_cn = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["summary_cn"], pad_ids["summary_cn"]).to(device) criterion_en = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["summary_en"], pad_ids["summary_en"]).to(device) n_step = ckpt logging.info("Start translation...") with torch.set_grad_enabled(False): valid(model, criterion_cn, criterion_en, valid_dataset, n_step)
if 'en_decoder' in layer_tensor_name: task2 += torch.numel(tensor) print('encoder: {}, task1: {}, task2: {}, Total parameters: {}'.format( encoder, task1, task2, total)) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print(pytorch_total_params) if __name__ == '__main__': train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True) fields = train_dataset.fields logging.info("Build model...") pad_ids = { "source": fields["source"].pad_id, "summary_cn": fields["summary_cn"].pad_id, "summary_en": fields["summary_en"].pad_id } vocab_sizes = { "source": len(fields["source"].vocab), "summary_cn": len(fields["summary_cn"].vocab), "summary_en": len(fields["summary_en"].vocab) } model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device) calc_parameters_torch(model)