示例#1
0
    def __init__(self, opt):
        self.opt = opt
        print(opt, "\n")
        self.device = torch.device('cuda' if opt.cuda else 'cpu')

        checkpoint = torch.load(opt.model)
        model_opt = checkpoint['settings']
        self.model_opt = model_opt
        print(model_opt)
        model = Transformer(
            model_opt.src_vocab_size,
            model_opt.tgt_vocab_size,
            model_opt.max_token_seq_len,
            tgt_emb_prj_weight_sharing=model_opt.proj_share_weight,
            emb_src_tgt_weight_sharing=model_opt.embs_share_weight,
            d_k=model_opt.d_k,
            d_v=model_opt.d_v,
            d_model=model_opt.d_model,
            d_word_vec=model_opt.d_word_vec,
            d_inner=model_opt.d_inner_hid,
            n_layers=model_opt.n_layers,
            n_head=model_opt.n_head,
            dropout=model_opt.dropout)

        model.load_state_dict(checkpoint['model'])
        print('[Info] Trained model state loaded.')

        model.word_prob_prj = nn.LogSoftmax(dim=1)

        model = model.to(self.device)

        self.model = model
        self.model.eval()
    def __init__(self, opt):
        self.opt = opt
        self.device = torch.device('cuda' if opt.cuda else 'cpu')

        checkpoint = torch.load(opt.model)
        model_opt = checkpoint['settings']
        self.model_opt = model_opt

        if opt.prune:
            # NetworkWrapper
            prune_params = {'alpha': opt.prune_alpha}
            pruner = Pruner(device=device,
                            load_mask=opt.load_mask,
                            prune_params=prune_params)

            transformer = NetworkWrapper(
                model_opt.src_vocab_size,
                model_opt.tgt_vocab_size,
                model_opt.max_token_seq_len,
                tgt_emb_prj_weight_sharing=model_opt.proj_share_weight,
                emb_src_tgt_weight_sharing=model_opt.embs_share_weight,
                d_k=model_opt.d_k,
                d_v=model_opt.d_v,
                d_model=model_opt.d_model,
                d_word_vec=model_opt.d_word_vec,
                d_inner=model_opt.d_inner_hid,
                n_layers=model_opt.n_layers,
                n_head=model_opt.n_head,
                dropout=model_opt.dropout,
                transformer=pruner)
        else:
            model = Transformer(
                model_opt.src_vocab_size,
                model_opt.tgt_vocab_size,
                model_opt.max_token_seq_len,
                tgt_emb_prj_weight_sharing=model_opt.proj_share_weight,
                emb_src_tgt_weight_sharing=model_opt.embs_share_weight,
                d_k=model_opt.d_k,
                d_v=model_opt.d_v,
                d_model=model_opt.d_model,
                d_word_vec=model_opt.d_word_vec,
                d_inner=model_opt.d_inner_hid,
                n_layers=model_opt.n_layers,
                n_head=model_opt.n_head,
                dropout=model_opt.dropout)

        model.load_state_dict(checkpoint['model'])
        print('[Info] Trained model state loaded.')

        model.word_prob_prj = nn.LogSoftmax(dim=1)

        model = model.to(self.device)

        self.model = model
        self.model.eval()
示例#3
0
    def __init__(self, opt):
        #opt is from argprass
        self.opt = opt
        self.device = torch.device('cuda' if opt.cuda else 'cpu')
        self.m = opt.m
        #opt.model is the model path
        checkpoint = torch.load(opt.model)
        #model_opt is the model hyper params
        model_opt = checkpoint['settings']
        self.model_opt = model_opt

        model = Transformer(
            model_opt.src_vocab_size,
            model_opt.tgt_vocab_size,
            model_opt.max_token_seq_len,
            tgt_emb_prj_weight_sharing=model_opt.proj_share_weight,
            emb_src_tgt_weight_sharing=model_opt.embs_share_weight,
            d_k=model_opt.d_k,
            d_v=model_opt.d_v,
            d_model=model_opt.d_model,
            d_word_vec=model_opt.d_word_vec,
            d_inner=model_opt.d_inner_hid,
            n_layers=model_opt.n_layers,
            n_head=model_opt.n_head,
            dropout=model_opt.dropout,
            return_attns=opt.return_attns)

        #Load the actual model weights
        model.load_state_dict(checkpoint['model'])
        print('[Info] Trained model state loaded.')

        model.word_prob_prj = nn.LogSoftmax(dim=1)

        model = model.to(self.device)

        self.model = model
        self.model.eval()
    def __init__(self, opt):
        self.opt = opt
        self.device = torch.device('cuda' if opt.cuda else 'cpu')

        checkpoint = torch.load(opt.model)
        model_opt = checkpoint['settings']
        self.model_opt = model_opt
        '''added by self'''
        checkpoint_copy = checkpoint['model'].copy()
        for k in list(checkpoint_copy.keys()):
            new_key = k.replace('module.model.', '')
            checkpoint_copy.update({str(new_key): checkpoint_copy.pop(k)})
        ''' end '''
        model = Transformer(
            model_opt.src_vocab_size,
            model_opt.tgt_vocab_size,
            model_opt.max_token_seq_len,
            tgt_emb_prj_weight_sharing=model_opt.proj_share_weight,
            emb_src_tgt_weight_sharing=model_opt.embs_share_weight,
            d_k=model_opt.d_k,
            d_v=model_opt.d_v,
            d_model=model_opt.d_model,
            d_word_vec=model_opt.d_word_vec,
            d_inner=model_opt.d_inner_hid,
            n_layers=model_opt.n_layers,
            n_head=model_opt.n_head,
            dropout=model_opt.dropout)

        model.load_state_dict(checkpoint_copy)
        print('[Info] Trained model state loaded.')

        model.word_prob_prj = nn.LogSoftmax(dim=1)

        model = model.to(self.device)

        self.model = model
        self.model.eval()
示例#5
0
import torch
import resnet
import HyperParameters as hp
import torch
import torch.nn as nn
import os
import transformer.Constants as Constants
import numpy as np

if __name__ == "__main__":
	torch.cuda.set_device(hp.gpu)
	testLoader = dataset.getDataLoader(is_train=False, batch_size=5, shuffle=False)

	net1 = resnet.resnet34()
	net2 = Transformer(len_encoder=hp.enc_input_len, n_tgt_vocab=hp.num_classes, len_max_seq=hp.max_seq_len, n_layers=hp.n_layers)
	net2.word_prob_prj = nn.LogSoftmax(dim=1)
	net1.cuda().eval()
	#net2.cuda().eval()

	path_to_restore = os.path.join(hp.checkpoint_path, hp.model_path_pre+"_"+str(hp.model_path_idx) + ".pth")
	if os.path.exists(path_to_restore):
		print("restore from:", path_to_restore)
		checkpoint = torch.load(path_to_restore)
		net1.load_state_dict(checkpoint["state_dict_net1"])
		net2.load_state_dict(checkpoint["state_dict_net2"])
		print("restore successfully!")
	else:
		print("fail to restore, path don't exist")
	
	translator = Translator(net2, beam_size=hp.beam_size, max_seq_len=hp.max_seq_len, n_best=hp.n_best)