def __init__(self, opt): self.opt = opt print(opt, "\n") self.device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] self.model_opt = model_opt print(model_opt) model = Transformer( model_opt.src_vocab_size, model_opt.tgt_vocab_size, model_opt.max_token_seq_len, tgt_emb_prj_weight_sharing=model_opt.proj_share_weight, emb_src_tgt_weight_sharing=model_opt.embs_share_weight, d_k=model_opt.d_k, d_v=model_opt.d_v, d_model=model_opt.d_model, d_word_vec=model_opt.d_word_vec, d_inner=model_opt.d_inner_hid, n_layers=model_opt.n_layers, n_head=model_opt.n_head, dropout=model_opt.dropout) model.load_state_dict(checkpoint['model']) print('[Info] Trained model state loaded.') model.word_prob_prj = nn.LogSoftmax(dim=1) model = model.to(self.device) self.model = model self.model.eval()
def __init__(self, opt): self.opt = opt self.device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] self.model_opt = model_opt if opt.prune: # NetworkWrapper prune_params = {'alpha': opt.prune_alpha} pruner = Pruner(device=device, load_mask=opt.load_mask, prune_params=prune_params) transformer = NetworkWrapper( model_opt.src_vocab_size, model_opt.tgt_vocab_size, model_opt.max_token_seq_len, tgt_emb_prj_weight_sharing=model_opt.proj_share_weight, emb_src_tgt_weight_sharing=model_opt.embs_share_weight, d_k=model_opt.d_k, d_v=model_opt.d_v, d_model=model_opt.d_model, d_word_vec=model_opt.d_word_vec, d_inner=model_opt.d_inner_hid, n_layers=model_opt.n_layers, n_head=model_opt.n_head, dropout=model_opt.dropout, transformer=pruner) else: model = Transformer( model_opt.src_vocab_size, model_opt.tgt_vocab_size, model_opt.max_token_seq_len, tgt_emb_prj_weight_sharing=model_opt.proj_share_weight, emb_src_tgt_weight_sharing=model_opt.embs_share_weight, d_k=model_opt.d_k, d_v=model_opt.d_v, d_model=model_opt.d_model, d_word_vec=model_opt.d_word_vec, d_inner=model_opt.d_inner_hid, n_layers=model_opt.n_layers, n_head=model_opt.n_head, dropout=model_opt.dropout) model.load_state_dict(checkpoint['model']) print('[Info] Trained model state loaded.') model.word_prob_prj = nn.LogSoftmax(dim=1) model = model.to(self.device) self.model = model self.model.eval()
def __init__(self, opt): #opt is from argprass self.opt = opt self.device = torch.device('cuda' if opt.cuda else 'cpu') self.m = opt.m #opt.model is the model path checkpoint = torch.load(opt.model) #model_opt is the model hyper params model_opt = checkpoint['settings'] self.model_opt = model_opt model = Transformer( model_opt.src_vocab_size, model_opt.tgt_vocab_size, model_opt.max_token_seq_len, tgt_emb_prj_weight_sharing=model_opt.proj_share_weight, emb_src_tgt_weight_sharing=model_opt.embs_share_weight, d_k=model_opt.d_k, d_v=model_opt.d_v, d_model=model_opt.d_model, d_word_vec=model_opt.d_word_vec, d_inner=model_opt.d_inner_hid, n_layers=model_opt.n_layers, n_head=model_opt.n_head, dropout=model_opt.dropout, return_attns=opt.return_attns) #Load the actual model weights model.load_state_dict(checkpoint['model']) print('[Info] Trained model state loaded.') model.word_prob_prj = nn.LogSoftmax(dim=1) model = model.to(self.device) self.model = model self.model.eval()
def __init__(self, opt): self.opt = opt self.device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] self.model_opt = model_opt '''added by self''' checkpoint_copy = checkpoint['model'].copy() for k in list(checkpoint_copy.keys()): new_key = k.replace('module.model.', '') checkpoint_copy.update({str(new_key): checkpoint_copy.pop(k)}) ''' end ''' model = Transformer( model_opt.src_vocab_size, model_opt.tgt_vocab_size, model_opt.max_token_seq_len, tgt_emb_prj_weight_sharing=model_opt.proj_share_weight, emb_src_tgt_weight_sharing=model_opt.embs_share_weight, d_k=model_opt.d_k, d_v=model_opt.d_v, d_model=model_opt.d_model, d_word_vec=model_opt.d_word_vec, d_inner=model_opt.d_inner_hid, n_layers=model_opt.n_layers, n_head=model_opt.n_head, dropout=model_opt.dropout) model.load_state_dict(checkpoint_copy) print('[Info] Trained model state loaded.') model.word_prob_prj = nn.LogSoftmax(dim=1) model = model.to(self.device) self.model = model self.model.eval()
import torch import resnet import HyperParameters as hp import torch import torch.nn as nn import os import transformer.Constants as Constants import numpy as np if __name__ == "__main__": torch.cuda.set_device(hp.gpu) testLoader = dataset.getDataLoader(is_train=False, batch_size=5, shuffle=False) net1 = resnet.resnet34() net2 = Transformer(len_encoder=hp.enc_input_len, n_tgt_vocab=hp.num_classes, len_max_seq=hp.max_seq_len, n_layers=hp.n_layers) net2.word_prob_prj = nn.LogSoftmax(dim=1) net1.cuda().eval() #net2.cuda().eval() path_to_restore = os.path.join(hp.checkpoint_path, hp.model_path_pre+"_"+str(hp.model_path_idx) + ".pth") if os.path.exists(path_to_restore): print("restore from:", path_to_restore) checkpoint = torch.load(path_to_restore) net1.load_state_dict(checkpoint["state_dict_net1"]) net2.load_state_dict(checkpoint["state_dict_net2"]) print("restore successfully!") else: print("fail to restore, path don't exist") translator = Translator(net2, beam_size=hp.beam_size, max_seq_len=hp.max_seq_len, n_best=hp.n_best)