示例#1
0
def handle(common, src, tgt, srcm, rsm, minfreq=False, vsize=False):

    vcbc, nwordf = ldvocab(common,
                           minf=minfreq,
                           omit_vsize=vsize,
                           vanilla=False)

    if src == common:
        src_indices = None
    else:
        vcbw, nword = ldvocab(src,
                              minf=minfreq,
                              omit_vsize=vsize,
                              vanilla=False)
        vcbw = reverse_dict(vcbw)
        src_indices = torch.tensor(
            [vcbc.get(vcbw[i], 0) for i in range(nword)], dtype=torch.long)
    if tgt == common:
        tgt_indices = None
    else:
        vcbw, nword = ldvocab(tgt,
                              minf=minfreq,
                              omit_vsize=vsize,
                              vanilla=False)
        vcbw = reverse_dict(vcbw)
        tgt_indices = torch.tensor(
            [vcbc.get(vcbw[i], 0) for i in range(nword)], dtype=torch.long)

    mymodel = NMT(cnfg.isize, nwordf, nwordf, cnfg.nlayer, cnfg.ff_hsize,
                  cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                  cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
                  cnfg.bindDecoderEmb, cnfg.forbidden_indexes)
    mymodel = load_model_cpu(srcm, mymodel)
    mymodel.update_vocab(src_indices=src_indices, tgt_indices=tgt_indices)
    save_model(mymodel, rsm, sub_module=False, h5args=h5zipargs)
示例#2
0
	def __init__(self, modelfs, fvocab_i, fvocab_t, cnfg, minbsize=1, expand_for_mulgpu=True, bsize=64, maxpad=16, maxpart=4, maxtoken=1536, minfreq = False, vsize = False):

		vcbi, nwordi = ldvocab(fvocab_i, minfreq, vsize)
		vcbt, nwordt = ldvocab(fvocab_t, minfreq, vsize)
		self.vcbi, self.vcbt = vcbi, reverse_dict(vcbt)

		if expand_for_mulgpu:
			self.bsize = bsize * minbsize
			self.maxtoken = maxtoken * minbsize
		else:
			self.bsize = bsize
			self.maxtoken = maxtoken
		self.maxpad = maxpad
		self.maxpart = maxpart
		self.minbsize = minbsize

		if isinstance(modelfs, (list, tuple)):
			models = []
			for modelf in modelfs:
				tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

				tmp = load_model_cpu(modelf, tmp)
				tmp.apply(load_fixing)

				models.append(tmp)
			model = Ensemble(models)

		else:
			model = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

			model = load_model_cpu(modelfs, model)
			model.apply(load_fixing)

		model.eval()

		self.use_cuda, self.cuda_device, cuda_devices, self.multi_gpu = parse_cuda_decode(cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding)

		if self.use_cuda:
			model.to(self.cuda_device)
			if self.multi_gpu:
				model = DataParallelMT(model, device_ids=cuda_devices, output_device=self.cuda_device.index, host_replicate=True, gather_output=False)
		self.use_amp = cnfg.use_amp and self.use_cuda

		self.beam_size = cnfg.beam_size

		self.length_penalty = cnfg.length_penalty
		self.net = model
示例#3
0
def handle(srcf, rsf, typ, rlist):

    td = h5py.File(cnfg.dev_data, "r")
    nwordi = int(td["nwordi"][:][0])
    nwordt = int(td["nwordt"][:][0])
    td.close()

    _tmpm = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output,
                cnfg.bindDecoderEmb, cnfg.forbidden_indexes)
    _tmpm = load_model_cpu(srcf, _tmpm)
    if typ == "enc":
        _tmpm.enc.nets = ModuleList(remove_layers(list(_tmpm.enc.nets), rlist))
    elif typ == "dec":
        _tmpm.dec.nets = ModuleList(remove_layers(list(_tmpm.dec.nets), rlist))

    save_model(_tmpm, rsf, False)
示例#4
0
def load_fixing(module):

    if hasattr(module, "fix_load"):
        module.fix_load()


td = h5py.File(cnfg.dev_data, "r")

ntest = td["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

if len(sys.argv) == 2:
    mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                  cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                  cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
                  cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

    mymodel = load_model_cpu(sys.argv[1], mymodel)
    mymodel.apply(load_fixing)

else:
    models = []
    for modelf in sys.argv[1:]:
        tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                  cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                  cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
                  cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

        tmp = load_model_cpu(modelf, tmp)
        tmp.apply(load_fixing)
示例#5
0
def load_fixing(module):

    if "fix_load" in dir(module):
        module.fix_load()


td = h5py.File(sys.argv[2], "r")

ntest = td["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

if len(sys.argv) == 4:
    mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                  cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                  cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
                  cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

    mymodel = load_model_cpu(sys.argv[3], mymodel)
    mymodel.apply(load_fixing)

else:
    models = []
    for modelf in sys.argv[3:]:
        tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                  cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                  cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
                  cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

        tmp = load_model_cpu(modelf, tmp)
        tmp.apply(load_fixing)
示例#6
0
set_random_seed(cnfg.seed, use_cuda)

td = h5py.File(cnfg.train_data, "r")
vd = h5py.File(cnfg.dev_data, "r")

ntrain = td["ndata"][:].item()
nvalid = vd["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

tl = [str(i) for i in range(ntrain)]

logger.info("Design models with seed: %d" % torch.initial_seed())
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
              cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
              cache_len_default, cnfg.attn_hsize, cnfg.norm_output,
              cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

fine_tune_m = cnfg.fine_tune_m

mymodel = init_model_params(mymodel)
mymodel.apply(init_fixing)
if fine_tune_m is not None:
    logger.info("Load pre-trained model from: " + fine_tune_m)
    mymodel = load_model_cpu(fine_tune_m, mymodel)
    mymodel.apply(load_fixing)

lossf = LabelSmoothingLoss(nwordt,
                           cnfg.label_smoothing,
                           ignore_index=pad_id,
                           reduction='sum',
示例#7
0
    if cnfg.save_train_state:
        statesf = wkdir + "checkpoint.states"

logger = get_logger(wkdir + "train.log")

td = h5py.File(cnfg.train_data, "r")
vd = h5py.File(cnfg.dev_data, "r")

ntrain = int(td["ndata"][:][0])
nvalid = int(vd["ndata"][:][0])
nwordi = int(td["nwordi"][:][0])
nwordt = int(td["nwordt"][:][0])

logger.info("Design models with seed: %d" % torch.initial_seed())
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
              cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
              cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output,
              cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

fine_tune_m = cnfg.fine_tune_m

tl = [("i" + str(i), "t" + str(i)) for i in range(ntrain)]

if fine_tune_m is None:
    mymodel = init_model_params(mymodel)
    mymodel.apply(init_fixing)
else:
    logger.info("Load pre-trained model from: " + fine_tune_m)
    mymodel = load_model_cpu(fine_tune_m, mymodel)

#lw = torch.ones(nwordt).float()
#lw[0] = 0.0
示例#8
0
    return rs, cwd


def reverse_dict(din):
    rs = {}
    for k, v in din.items():
        rs[v] = k
    return rs


td = h5py.File(cnfg.test_data, "r")
nwordi = int(td["nwordi"][:][0])
td.close()

vcbt, nwordt = ldvocab(sys.argv[2])
vcbt = reverse_dict(vcbt)

mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
              cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
              cnfg.cache_len, cnfg.attn_hsize)

mymodel.load_state_dict(torch.load(sys.argv[1], map_location='cpu'))

initmodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                cnfg.cache_len, cnfg.attn_hsize)

print(initmodel.enc.pemb.w.data.equal(initmodel.dec.pemb.w.data))
print(initmodel.enc.pemb.w.data.equal(mymodel.enc.pemb.w.data))
print(initmodel.enc.pemb.w.data.equal(mymodel.dec.pemb.w.data))
示例#9
0
logger = get_logger(wkdir + "train.log")

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid)

set_random_seed(cnfg.seed, use_cuda)

td = h5py.File(cnfg.train_data, "r")
vd = h5py.File(cnfg.dev_data, "r")

ntrain = td["ndata"][:].item()
nvalid = vd["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

logger.info("Design models with seed: %d" % torch.initial_seed())
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

fine_tune_m = cnfg.fine_tune_m

tl = [str(i) for i in range(ntrain)]

mymodel = init_model_params(mymodel)
mymodel.apply(init_fixing)
if fine_tune_m is not None:
	logger.info("Load pre-trained model from: " + fine_tune_m)
	mymodel = load_model_cpu(fine_tune_m, mymodel)

lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes)

if cnfg.src_emb is not None:
	logger.info("Load source embedding from: " + cnfg.src_emb)
示例#10
0
nvalid = vd["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

tl = [str(i) for i in range(ntrain)]

logger.info("Design models with seed: %d" % torch.initial_seed())
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_layer_fwd)

fine_tune_m = cnfg.fine_tune_m

mymodel = init_model_params(mymodel)
mymodel.apply(init_fixing)
if fine_tune_m is not None:
	logger.info("Load pre-trained model from: " + fine_tune_m)
	_tmpm = NMTBase(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)
	_tmpm = init_model_params(_tmpm)
	_tmpm.apply(init_fixing)
	_tmpm = load_model_cpu(fine_tune_m, _tmpm)
	freeze_module(_tmpm)
	mymodel.load_base(_tmpm)
	_tmpm = None

if cnfg.probe_remove_self:
	mymodel.dec.nets[-1].perform_self_attn = False
elif cnfg.probe_remove_cross:
	mymodel.dec.nets[-1].perform_cross_attn = False

lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes)

if cnfg.src_emb is not None: