Exemplo n.º 1
0
    def __init__(self,
                 modelfs,
                 fvocab_i,
                 fvocab_t,
                 cnfg,
                 minbsize=1,
                 expand_for_mulgpu=True,
                 bsize=64,
                 maxpad=16,
                 maxpart=4,
                 maxtoken=1536,
                 minfreq=False,
                 vsize=False):

        vcbi, nwordi = ldvocab(fvocab_i, minfreq, vsize)
        vcbt, nwordt = ldvocab(fvocab_t, minfreq, vsize)
        self.vcbi, self.vcbt = vcbi, reverse_dict(vcbt)

        if expand_for_mulgpu:
            self.bsize = bsize * minbsize
            self.maxtoken = maxtoken * minbsize
        else:
            self.bsize = bsize
            self.maxtoken = maxtoken
        self.maxpad = maxpad
        self.maxpart = maxpart
        self.minbsize = minbsize

        if isinstance(modelfs, (list, tuple)):
            models = []
            for modelf in modelfs:
                tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer,
                          cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop,
                          cnfg.share_emb, cnfg.nhead, cnfg.cache_len,
                          cnfg.attn_hsize, cnfg.norm_output,
                          cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

                tmp = load_model_cpu(modelf, tmp)
                tmp.apply(load_fixing)

                models.append(tmp)
            model = Ensemble(models)

        else:
            model = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize,
                        cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead,
                        cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output,
                        cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

            model = load_model_cpu(modelfs, model)
            model.apply(load_fixing)

        cuda_device = torch.device(cnfg.gpuid)

        model.eval()

        self.use_cuda, self.cuda_device, cuda_devices, self.multi_gpu = parse_cuda_decode(
            cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding)

        if self.use_cuda:
            model.to(self.cuda_device)
            if self.multi_gpu:
                model = DataParallelMT(model,
                                       device_ids=cuda_devices,
                                       output_device=cuda_device.index,
                                       host_replicate=True,
                                       gather_output=False)

        self.beam_size = cnfg.beam_size

        self.length_penalty = cnfg.length_penalty
        self.net = model
Exemplo n.º 2
0
vd = h5py.File(cnfg.dev_data, "r")

ntrain = td["ndata"][:].item()
nvalid = vd["ndata"][:].item()
nword = td["nword"][:].tolist()
nwordi, nwordt = nword[0], nword[-1]

logger.info("Design models with seed: %d" % torch.initial_seed())
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

fine_tune_m = cnfg.fine_tune_m

tl = [str(i) for i in range(ntrain)]

mymodel = init_model_params(mymodel)
mymodel.apply(init_fixing)
if fine_tune_m is not None:
	logger.info("Load pre-trained model from: " + fine_tune_m)
	mymodel = load_model_cpu(fine_tune_m, mymodel)

#lw = torch.ones(nwordt).float()
#lw[0] = 0.0
#lossf = nn.NLLLoss(lw, ignore_index=0, reduction='sum')
lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='sum', forbidden_index=cnfg.forbidden_indexes)

if cnfg.src_emb is not None:
	logger.info("Load source embedding from: " + cnfg.src_emb)
	load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi, cnfg.scale_down_emb, cnfg.freeze_srcemb)
if cnfg.tgt_emb is not None:
	logger.info("Load target embedding from: " + cnfg.tgt_emb)
	load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt, cnfg.scale_down_emb, cnfg.freeze_tgtemb)