#################################################################################### vocab_size = dataset_val.vocab_size ques_length = dataset_val.ques_length ans_length = dataset_val.ans_length + 1 his_length = dataset_val.ans_length + dataset_val.ques_length itow = dataset_val.itow img_feat_size = 512 print('init Generative model...') netG = _netG(opt.model, vocab_size, opt.ninp, opt.nhid, opt.nlayers, opt.dropout, False) netE_g = _netE(opt.model, opt.ninp, opt.nhid, opt.nlayers, opt.dropout, img_feat_size) netW_g = model._netW(vocab_size, opt.ninp, opt.dropout) sampler = model.gumbel_sampler() critG = model.G_loss(opt.ninp) critLM = model.LMCriterion() if opt.evalall == False: # opt.model_path_D != '' and opt.model_path_G != '': print('Loading Generative model...') netW_g.load_state_dict(checkpoint['netW']) netE_g.load_state_dict(checkpoint['netE']) netG.load_state_dict(checkpoint['netG']) else: print('Loading Generative model...') netW_g.load_state_dict(checkpoint['netW_g']) netE_g.load_state_dict(checkpoint['netE_g']) netG.load_state_dict(checkpoint['netG']) if opt.cuda: # ship to cuda, if has GPU netW_g.cuda()
def bulid_model_D(opt, src_vocab_size, tgt_vocab_size): save_path = opt.save_model print ("save_path", save_path) try: os.makedirs(save_path) except OSError: pass # if opt.train_from != '': # print("=> loading checkpoint '{}'".format(opt.train_from)) # checkpoint = torch.load(opt.train_from) # model_path = opt.train_from if opt.model_path_D != '' : print("=> loading checkpoint '{}'".format(opt.model_path_D)) checkpoint_D = torch.load(opt.model_path_D) else: model_path_D = save_path # if opt.model_path_G != '': # print("=> loading checkpoint '{}'".format(opt.model_path_G)) # checkpoint_G = torch.load(opt.model_path_G) lr = opt.LM_lr print("rnn size", opt.rnn_size) src_netE_att = _netE_att(opt.rnn_size, src_vocab_size, opt.dropout) src_netW = _netW(src_vocab_size, opt.rnn_size, opt.dropout, name="src", cuda=opt.cuda) print("src_netW", src_vocab_size, src_netW.word_embed) tgt_netW = _netW(tgt_vocab_size, opt.rnn_size, opt.dropout, name="tgt", cuda=opt.cuda) print("tgt_netW", tgt_vocab_size, tgt_netW.word_embed) tgt_netE_att = _netE_att(opt.rnn_size, tgt_vocab_size, opt.dropout, is_target=True) critD = model.nPairLoss(opt.rnn_size, opt.margin) if opt.model_path_D != '' : # load the pre-trained model. src_netW.load_state_dict(checkpoint_D['src_netW']) tgt_netW.load_state_dict(checkpoint_D['tgt_netW']) src_netE_att.load_state_dict(checkpoint_D['src_netE_att']) tgt_netE_att.load_state_dict(checkpoint_D['tgt_netE_att']) lr = checkpoint_D['lr'] if opt.cuda: tgt_netW.cuda() src_netW.cuda() src_netE_att.cuda() tgt_netE_att.cuda() critD.cuda() print('init Generative model...') sampler = model.gumbel_sampler() critG = model.G_loss(opt.rnn_size) critLM = model.LMCriterion() BLEU_score = model.BLEU_score() if opt.cuda: # ship to cuda, if has GPU sampler.cuda() critG.cuda() critLM.cuda() BLEU_score.cuda() print("load netD successfully") return tgt_netW, src_netW, src_netE_att, tgt_netE_att, critD, lr, sampler, critG, critLM, BLEU_score