tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) tmp = load_model_cpu(modelf, tmp) tmp.apply(load_fixing) models.append(tmp) mymodel = Ensemble(models) mymodel.eval() lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='none', forbidden_index=cnfg.forbidden_indexes) use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda( cnfg.use_cuda, cnfg.gpuid) use_amp = cnfg.use_amp and use_cuda # Important to make cudnn methods deterministic set_random_seed(cnfg.seed, use_cuda) if use_cuda: mymodel.to(cuda_device) lossf.to(cuda_device) if multi_gpu: mymodel = DataParallelMT(mymodel,
cnfg.bindDecoderEmb, cnfg.forbidden_indexes, ntask=ntask) fine_tune_m = cnfg.fine_tune_m mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) if fine_tune_m is not None: logger.info("Load pre-trained model from: " + fine_tune_m) mymodel = load_model_cpu(fine_tune_m, mymodel) mymodel.apply(load_fixing) lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes) if cnfg.src_emb is not None: logger.info("Load source embedding from: " + cnfg.src_emb) load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi, cnfg.scale_down_emb, cnfg.freeze_srcemb) if cnfg.tgt_emb is not None: logger.info("Load target embedding from: " + cnfg.tgt_emb) load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt, cnfg.scale_down_emb, cnfg.freeze_tgtemb) if cuda_device: mymodel.to(cuda_device) lossf.to(cuda_device)