def handle(common, src, tgt, srcm, rsm, minfreq=False, vsize=False): vcbc, nwordf = ldvocab(common, minf=minfreq, omit_vsize=vsize, vanilla=False) if src == common: src_indices = None else: vcbw, nword = ldvocab(src, minf=minfreq, omit_vsize=vsize, vanilla=False) vcbw = reverse_dict(vcbw) src_indices = torch.tensor( [vcbc.get(vcbw[i], 0) for i in range(nword)], dtype=torch.long) if tgt == common: tgt_indices = None else: vcbw, nword = ldvocab(tgt, minf=minfreq, omit_vsize=vsize, vanilla=False) vcbw = reverse_dict(vcbw) tgt_indices = torch.tensor( [vcbc.get(vcbw[i], 0) for i in range(nword)], dtype=torch.long) mymodel = NMT(cnfg.isize, nwordf, nwordf, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) mymodel = load_model_cpu(srcm, mymodel) mymodel.update_vocab(src_indices=src_indices, tgt_indices=tgt_indices) save_model(mymodel, rsm, sub_module=False, h5args=h5zipargs)
def __init__(self, modelfs, fvocab_i, fvocab_t, cnfg, minbsize=1, expand_for_mulgpu=True, bsize=64, maxpad=16, maxpart=4, maxtoken=1536, minfreq = False, vsize = False): vcbi, nwordi = ldvocab(fvocab_i, minfreq, vsize) vcbt, nwordt = ldvocab(fvocab_t, minfreq, vsize) self.vcbi, self.vcbt = vcbi, reverse_dict(vcbt) if expand_for_mulgpu: self.bsize = bsize * minbsize self.maxtoken = maxtoken * minbsize else: self.bsize = bsize self.maxtoken = maxtoken self.maxpad = maxpad self.maxpart = maxpart self.minbsize = minbsize if isinstance(modelfs, (list, tuple)): models = [] for modelf in modelfs: tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) tmp = load_model_cpu(modelf, tmp) tmp.apply(load_fixing) models.append(tmp) model = Ensemble(models) else: model = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) model = load_model_cpu(modelfs, model) model.apply(load_fixing) model.eval() self.use_cuda, self.cuda_device, cuda_devices, self.multi_gpu = parse_cuda_decode(cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding) if self.use_cuda: model.to(self.cuda_device) if self.multi_gpu: model = DataParallelMT(model, device_ids=cuda_devices, output_device=self.cuda_device.index, host_replicate=True, gather_output=False) self.use_amp = cnfg.use_amp and self.use_cuda self.beam_size = cnfg.beam_size self.length_penalty = cnfg.length_penalty self.net = model
def handle(srcf, rsf, typ, rlist): td = h5py.File(cnfg.dev_data, "r") nwordi = int(td["nwordi"][:][0]) nwordt = int(td["nwordt"][:][0]) td.close() _tmpm = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) _tmpm = load_model_cpu(srcf, _tmpm) if typ == "enc": _tmpm.enc.nets = ModuleList(remove_layers(list(_tmpm.enc.nets), rlist)) elif typ == "dec": _tmpm.dec.nets = ModuleList(remove_layers(list(_tmpm.dec.nets), rlist)) save_model(_tmpm, rsf, False)
def load_fixing(module): if hasattr(module, "fix_load"): module.fix_load() td = h5py.File(cnfg.dev_data, "r") ntest = td["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] if len(sys.argv) == 2: mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) mymodel = load_model_cpu(sys.argv[1], mymodel) mymodel.apply(load_fixing) else: models = [] for modelf in sys.argv[1:]: tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) tmp = load_model_cpu(modelf, tmp) tmp.apply(load_fixing)
def load_fixing(module): if "fix_load" in dir(module): module.fix_load() td = h5py.File(sys.argv[2], "r") ntest = td["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] if len(sys.argv) == 4: mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) mymodel = load_model_cpu(sys.argv[3], mymodel) mymodel.apply(load_fixing) else: models = [] for modelf in sys.argv[3:]: tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) tmp = load_model_cpu(modelf, tmp) tmp.apply(load_fixing)
set_random_seed(cnfg.seed, use_cuda) td = h5py.File(cnfg.train_data, "r") vd = h5py.File(cnfg.dev_data, "r") ntrain = td["ndata"][:].item() nvalid = vd["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] tl = [str(i) for i in range(ntrain)] logger.info("Design models with seed: %d" % torch.initial_seed()) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) fine_tune_m = cnfg.fine_tune_m mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) if fine_tune_m is not None: logger.info("Load pre-trained model from: " + fine_tune_m) mymodel = load_model_cpu(fine_tune_m, mymodel) mymodel.apply(load_fixing) lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum',
if cnfg.save_train_state: statesf = wkdir + "checkpoint.states" logger = get_logger(wkdir + "train.log") td = h5py.File(cnfg.train_data, "r") vd = h5py.File(cnfg.dev_data, "r") ntrain = int(td["ndata"][:][0]) nvalid = int(vd["ndata"][:][0]) nwordi = int(td["nwordi"][:][0]) nwordt = int(td["nwordt"][:][0]) logger.info("Design models with seed: %d" % torch.initial_seed()) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) fine_tune_m = cnfg.fine_tune_m tl = [("i" + str(i), "t" + str(i)) for i in range(ntrain)] if fine_tune_m is None: mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) else: logger.info("Load pre-trained model from: " + fine_tune_m) mymodel = load_model_cpu(fine_tune_m, mymodel) #lw = torch.ones(nwordt).float() #lw[0] = 0.0
return rs, cwd def reverse_dict(din): rs = {} for k, v in din.items(): rs[v] = k return rs td = h5py.File(cnfg.test_data, "r") nwordi = int(td["nwordi"][:][0]) td.close() vcbt, nwordt = ldvocab(sys.argv[2]) vcbt = reverse_dict(vcbt) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize) mymodel.load_state_dict(torch.load(sys.argv[1], map_location='cpu')) initmodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize) print(initmodel.enc.pemb.w.data.equal(initmodel.dec.pemb.w.data)) print(initmodel.enc.pemb.w.data.equal(mymodel.enc.pemb.w.data)) print(initmodel.enc.pemb.w.data.equal(mymodel.dec.pemb.w.data))
logger = get_logger(wkdir + "train.log") use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid) set_random_seed(cnfg.seed, use_cuda) td = h5py.File(cnfg.train_data, "r") vd = h5py.File(cnfg.dev_data, "r") ntrain = td["ndata"][:].item() nvalid = vd["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] logger.info("Design models with seed: %d" % torch.initial_seed()) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) fine_tune_m = cnfg.fine_tune_m tl = [str(i) for i in range(ntrain)] mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) if fine_tune_m is not None: logger.info("Load pre-trained model from: " + fine_tune_m) mymodel = load_model_cpu(fine_tune_m, mymodel) lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes) if cnfg.src_emb is not None: logger.info("Load source embedding from: " + cnfg.src_emb)
nvalid = vd["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] tl = [str(i) for i in range(ntrain)] logger.info("Design models with seed: %d" % torch.initial_seed()) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_layer_fwd) fine_tune_m = cnfg.fine_tune_m mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) if fine_tune_m is not None: logger.info("Load pre-trained model from: " + fine_tune_m) _tmpm = NMTBase(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) _tmpm = init_model_params(_tmpm) _tmpm.apply(init_fixing) _tmpm = load_model_cpu(fine_tune_m, _tmpm) freeze_module(_tmpm) mymodel.load_base(_tmpm) _tmpm = None if cnfg.probe_remove_self: mymodel.dec.nets[-1].perform_self_attn = False elif cnfg.probe_remove_cross: mymodel.dec.nets[-1].perform_cross_attn = False lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes) if cnfg.src_emb is not None: