def __init__(self, pre_path=''): self.all_data = ABSAData() self.train_iter = self.all_data.train_iter self.val_iter = self.all_data.val_iter self.test_iter = self.all_data.test_iter self.text_vocab = self.all_data.text_vocab self.aspect_vocab = self.all_data.aspect_vocab self.label_vocab = self.all_data.label_vocab self.device_dict = { -1: 'cpu', 0: 'cuda:0', 1: 'cuda:1', 2: 'cuda:2', } self.model = config.model().to(self.device_dict[config.device]) if config.pretrain: self.load_model(config.pretrain_path) self.criterion = config.criterion() # TODO: Set momentum for optimizer, momentum=0.9 self.optimizer = config.optimizer(filter(lambda p: p.requires_grad, self.model.parameters()), lr=config.learning_rate, lr_decay=config.lr_decay, weight_decay=0.001) if config.if_log: self.writer = SummaryWriter(log_dir=config.log_dir) # Create pretrained model folder if not config.pretrain: if pre_path != '': self.pre_dir = pre_path
def align_classification_train_crf(self, train_data, valid_data, test_data, embed, pretrain=True): init_aspect = np.array(np.load("initAspect.npy")) # init_aspect = init_aspect / np.linalg.norm(init_aspect, axis=-1, keepdims=True) init_aspect = torch.from_numpy(init_aspect) PreTrainABAE = clas_model.PreTrainABAE(init_aspect, embed).to(config.device) pre_trained_aspect = torch.load("AspectExtract/Aspect_Model.pkl") aspect_dict = PreTrainABAE.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_aspect.items() if k in aspect_dict } aspect_dict.update(pre_trained_dict) PreTrainABAE.load_state_dict(aspect_dict) # PreTrainABAE = PreTrainABAE.eval() trained_aspect = pre_trained_aspect["aspect_lookup_mat"].data run = clas_model.CrfWdeRnnEncoder(300, 300, 50, embed, trained_aspect).to(config.device) # params = [] # for param in run.parameters(): # if param.requires_grad: # params.append(param) # 加载预训练权重 if pretrain is True: pre_trained_dict = torch.load(config.pretrained_model) # pre_trained_dict = torch.load(config.pretrained_model, map_location=lambda storage, loc: storage) model_dict = run.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_dict.items() if k in model_dict } model_dict.update(pre_trained_dict) run.load_state_dict(model_dict) criterion = config.criterion() optimizer_rnn = config.optimizer(filter(lambda p: p.requires_grad, run.parameters()), lr=config.clas_lr) optimizer_abae = config.optimizer(filter(lambda p: p.requires_grad, PreTrainABAE.parameters()), lr=config.clas_lr) all_evaluate = [] best_test = 0 for epoch in range(config.epoch + 1): run_hidden = run.initHidden(config.batch_size) # context = torch.ones((config.batch_size, 50)) # loss_last = torch.tensor([0], dtype=torch.float) optimizer_rnn.zero_grad() optimizer_abae.zero_grad() run.zero_grad() for idx, sample_batch in enumerate(train_data): run = run.train() PreTrainABAE = PreTrainABAE.train() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) aspect_info, trained_aspect, reg = PreTrainABAE(input_data) input_data[:, 1] = aspect_info out = run(input_data, run_hidden, trained_aspect, "train").view(config.batch_size, 2).to(config.device) # print("result :", out.size()) # print(label) # loss = criterion(out, label) + reg.float() loss = criterion(out, label) loss.backward() optimizer_rnn.step() optimizer_abae.step() # if epoch % 5 == 0: # run.zero_grad() # run = run.eval() # valid_now = self.valid(run) # print('epoch {} of {}: TEST : {}'.format(epoch, 100, valid_now)) print('epoch {} of {}: loss : {}'.format(epoch, config.epoch, loss)) if epoch % 1 == 0: with torch.no_grad(): total = 0 correct = 0 optimizer_rnn.zero_grad() optimizer_abae.zero_grad() run.zero_grad() PreTrainABAE.zero_grad() run_hidden = run.initHidden(1) # context = torch.ones((1, 50)) for index, sample_batch in enumerate(valid_data): run = run.eval() PreTrainABAE = PreTrainABAE.eval() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) aspect_info, trained_aspect, _ = PreTrainABAE( input_data) input_data[:, 1] = aspect_info outputs = run(input_data, run_hidden, trained_aspect, "test").view(1, 2).to(config.device) _, predicted = torch.max(outputs.data, 1) # print(outputs) # print(predicted) # print(label) total += label.size(0) # print(total) correct += (predicted == label).sum().item() # print(correct) acc = correct / total print("acc rate :", acc) if acc > best_test: best_test = acc file_name = "ClassifyModelSave/Final_model.pkl" file_name_aspect = "ClassifyModelSave/Final_model_aspect.pkl" torch.save(run.state_dict(), file_name) torch.save(PreTrainABAE.state_dict(), file_name_aspect) all_evaluate.append(acc) ''' Load the best models and Begin test ''' PreTrainABAE_test = clas_model.PreTrainABAE(init_aspect, embed).to(config.device) pre_trained_aspect = torch.load( "ClassifyModelSave/Final_model_aspect.pkl") aspect_dict = PreTrainABAE_test.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_aspect.items() if k in aspect_dict } aspect_dict.update(pre_trained_dict) PreTrainABAE_test.load_state_dict(aspect_dict) trained_aspect = pre_trained_aspect["aspect_lookup_mat"].data model_test = clas_model.CrfWdeRnnEncoder( 300, 300, 50, embed, trained_aspect).to(config.device) pre_trained_dict = torch.load("ClassifyModelSave/Final_model.pkl") model_dict = model_test.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_dict.items() if k in model_dict } model_dict.update(pre_trained_dict) model_test.load_state_dict(model_dict) with torch.no_grad(): total = 0 correct = 0 model_test.zero_grad() PreTrainABAE_test.zero_grad() run_hidden = model_test.initHidden(1) # context = torch.ones((1, 50)) for index, sample_batch in enumerate(test_data): model_test = model_test.eval() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) aspect_info, trained_aspect, _ = PreTrainABAE_test(input_data) input_data[:, 1] = aspect_info outputs = model_test(input_data, run_hidden, trained_aspect, "test").view(1, 2).to(config.device) _, predicted = torch.max(outputs.data, 1) # print(outputs) # print(predicted) # print(label) total += label.size(0) # print(total) correct += (predicted == label).sum().item() # print(correct) acc = correct / total print("Test acc rate (final result) :", acc) return all_evaluate
def weakly_train(self, train_data, test_pos, test_neg, embed, asp_list): # run = models.AttentionEncoder(300, 300, 50, embed).to(config.device) # init_aspect = np.array(np.load("initAspect.npy")) # # init_aspect = init_aspect / np.linalg.norm(init_aspect, axis=-1, keepdims=True) # init_aspect = torch.from_numpy(init_aspect) # pre_train_abae = weak_model.PreTrainABAE(init_aspect, embed).to(config.device) # # pre_trained_aspect = torch.load("AspectExtract/Aspect_Model.pkl") # aspect_dict = pre_train_abae.state_dict() # pre_trained_dict = {k: v for k, v in pre_trained_aspect.items() if k in aspect_dict} # aspect_dict.update(pre_trained_dict) # pre_train_abae.load_state_dict(aspect_dict) # pre_train_abae = pre_train_abae.eval() # # trained_aspect = pre_trained_aspect["aspect_lookup_mat"].data # run = weak_model.WdeRnnEncoderFix(300, 300, 50, embed, trained_aspect).to(config.device) run = weak_model.WdeRnnEncoderFix(300, 300, 50, embed).to(config.device) # context = torch.ones((config.batch_size, 50)) # optimizer = optim.Adagrad(params, lr=0.003) # params = [] # for param in run.parameters(): # if param.requires_grad: # params.append(param) # optimizer = optim.SGD(filter(lambda p: p.requires_grad, run.parameters()), lr=0.0001) optimizer = config.optimizer(filter(lambda p: p.requires_grad, run.parameters()), lr=config.weak_lr) loss_func = config.criterion(margin=config.margin, p=config.margin_p) for epoch in range(config.epoch): run_hidden = run.initHidden(config.batch_size) loss_last = torch.tensor([0], dtype=torch.float) optimizer.zero_grad() # run.zero_grad() for idx, sample_batch in enumerate(train_data): # now = time.time() run = run.train() input1 = sample_batch['input1'].to(config.device) input2 = sample_batch['input2'].to(config.device) input3 = sample_batch['input3'].to(config.device) aspect1 = sample_batch['aspect1'].to(config.device) aspect2 = sample_batch['aspect2'].to(config.device) aspect3 = sample_batch['aspect3'].to(config.device) # get aspect info # aspect_info = pre_train_abae(input1) # input1[:, 1] = aspect_info # aspect_info = pre_train_abae(input2) # input2[:, 1] = aspect_info # aspect_info = pre_train_abae(input3) # input3[:, 1] = aspect_info # feed input data out1 = run(input1, run_hidden, aspect1).view(config.batch_size, 300) out2 = run(input2, run_hidden, aspect2).view(config.batch_size, 300) out3 = run(input3, run_hidden, aspect3).view(config.batch_size, 300) # count loss loss_last = loss_func(out1, out2, out3) loss_last.backward() optimizer.step() if epoch % config.valid_step == 0: run.zero_grad() run = run.eval() valid_now = self.valid(asp_list, run, test_pos, test_neg, embed) a = round((loss_last).item(), 5) b = round(valid_now, 5) if config.save_model and valid_now > config.valid_thres: file_name = config.save_model_path + "model_loss_" + str( a) + "valid_" + str(b) + ".pkl" torch.save(run.state_dict(), file_name) print('epoch {} of {}: TEST : {}'.format( epoch, config.epoch, valid_now)) print('epoch {} of {}: loss : {}'.format(epoch, config.epoch, loss_last.item()))
content = content.to(device) content,templatePatch = randCrop(content,templates,opt.imageSize,targetMosaic) templatePatch =templatePatch.to(device)##needed -- I create new float Tensor in randCrop if opt.trainOverfit: content = content.to(device) if epoch==0 and i==0: print ("template size",templatePatch.shape) # train with real netD.zero_grad() text, _ = data batch_size = content.size(0)##if we use texture and content of diff size may have issue -- just trim text=text.to(device) output = netD(text)##used to find correct size for label errD_real = criterion(output, output.detach()*0+real_label) errD_real.backward() D_x = output.mean() # train with fake noise=setNoise(noise) fake, alpha, A, mixedI = famosGeneration(content, noise, templatePatch, True) output = netD(fake.detach())#???why detach errD_fake = criterion(output, output.detach()*0+fake_label) errD_fake.backward() if opt.fAdvM > 0: loss_adv_mixed = criterion(netD(mixedI.detach()), output.detach() * 0 + fake_label) loss_adv_mixed.backward() D_G_z1 = output.mean()
for epoch in range(opt.niter): for i, data in enumerate(dataloader, 0): t0 = time.time() sys.stdout.flush() content = next(iter(cdataloader))[0] content = content.to(device) # train with real netD.zero_grad() text, _ = data batch_size = content.size( 0 ) ##if we use texture and content of diff size may have issue -- just trim text = text.to(device) output = netD(text) errD_real = criterion(output, output.detach() * 0 + real_label) errD_real.backward() D_x = output.mean() # train with fake noise = setNoise(noise) fake = ganGeneration(content, noise) output = netD(fake.detach()) errD_fake = criterion(output, output.detach() * 0 + fake_label) errD_fake.backward() D_G_z1 = output.mean() errD = errD_real + errD_fake if opt.WGAN: gradient_penalty = calc_gradient_penalty( netD, text, fake[:text.shape[0]]) ##for case fewer text images
def train(opt, dataloader, dataloader_test, device, netD, netG, desc, Noise, optimizerD, optimizerG): # for plot errD_real_material_total, errD_real_color_total, errD_fake_material_total, errD_fake_color_total \ = [], [], [], [] errG_material_total, errG_color_total, epoch_total = [], [], [] [NZ, noise, fixnoise] = Noise for epoch in range(opt.niter): # average for each epoch errD_real_material_avg, errD_real_color_avg, errD_fake_material_avg, errD_fake_color_avg\ = 0.0, 0.0, 0.0, 0.0 errG_material_avg, errG_color_avg, counter = 0.0, 0.0, 0 real_label = 1 fake_label = 0 for i, data in enumerate(dataloader, 0): counter += 1 t0 = time.time() # sys.stdout.flush() texture, emb, description, _, _, _ = data material_label = torch.zeros(opt.batchSize, opt.z_material) material_label.copy_(emb[:, :opt.z_material]) material_label = material_label.long() color_label = torch.zeros(opt.batchSize, opt.z_color) color_label.copy_(emb[:, opt.z_material:opt.z_material + opt.z_color]) color_label = color_label.long() texture = texture.to(device) emb = emb.to(device) material_label = material_label.to(device) color_label = color_label.to(device) # ===================================================================== # train with real netD.zero_grad() output_adv, output_material, output_color = netD(texture) errD_real_adv = criterion(output_adv, output_adv.detach() * 0 + real_label) errD_real_material = material_criterion( output_material.squeeze(), torch.max(material_label, 1)[1]) errD_real_color = color_criterion(output_color.squeeze(), torch.max(color_label, 1)[1]) errD_real = errD_real_adv + errD_real_material + errD_real_color D_x = output_adv.mean() # for average error calculation errD_real_material_avg += errD_real_material.item() errD_real_color_avg += errD_real_color.item() # ===================================================================== # train with fake noise = setNoise(noise) fake = netG(noise, emb) # output = netD(fake.detach()) output_adv, output_material, output_color = netD(fake.detach()) errD_fake_adv = criterion(output_adv, output_adv.detach() * 0 + fake_label) errD_fake_material = material_criterion( output_material.squeeze(), torch.max(material_label, 1)[1]) errD_fake_color = color_criterion(output_color.squeeze(), torch.max(color_label, 1)[1]) errD_fake = errD_fake_adv + errD_fake_material + errD_fake_color D_G_z1 = output_adv.mean() # for average error calculation errD_fake_material_avg += errD_fake_material.item() errD_fake_color_avg += errD_fake_color.item() # perceptual loss if opt.use_perceptual_loss: output_adv_temp, output_material_temp, output_color_temp = netD( texture) errD_material_perc = perceptual_criterion( output_material, output_material_temp.detach()) errD_color_perc = perceptual_criterion( output_color, output_color_temp.detach()) # total errD = errD_real + errD_fake if opt.use_perceptual_loss: errD += errD_material_perc errD += errD_color_perc errD.backward() if opt.WGAN: gradient_penalty = calc_gradient_penalty( netD, texture, fake[:texture.shape[0]]) ##for case fewer texture images gradient_penalty.backward() optimizerD.step() if i > 0 and opt.WGAN and i % opt.dIter != 0: continue ## critic steps to 1 GEN steps # ===================================================================== # train G netG.zero_grad() noise = setNoise(noise) fake = netG(noise, emb) output_adv, output_material, output_color = netD(fake) errG_adv = criterion(output_adv, output_adv.detach() * 0 + real_label) errG_material = material_criterion(output_material.squeeze(), torch.max(material_label, 1)[1]) errG_color = color_criterion(output_color.squeeze(), torch.max(color_label, 1)[1]) # perceptual loss if opt.use_perceptual_loss: # output_adv_temp, output_material_temp, output_color_temp = netD(texture, emb) output_adv_temp, output_material_temp, output_color_temp = netD( texture) errG_material_perc = perceptual_criterion( output_material, output_material_temp.detach()) errG_color_perc = perceptual_criterion( output_color, output_color_temp.detach()) # for average error calculation errG_material_avg += errG_material.item() errG_color_avg += errG_color.item() D_G_z2 = output_adv.mean() errG = errG_adv + errG_material + errG_color # errG = errG_adv if opt.use_perceptual_loss: errG += errG_material_perc errG += errG_color_perc errG.backward() # optimizerU.step() optimizerG.step() print('[%d/%d][%d/%d] D(x): %.4f D(G(z)): %.4f / %.4f time %.4f' % (epoch, opt.niter, i, len(dataloader), D_x, D_G_z1, D_G_z2, time.time() - t0)) ### RUN INFERENCE AND SAVE LARGE OUTPUT MOSAICS if epoch % 1000 == 0: vutils.save_image(texture, '%s/%s/real_textures_%03d_%s.jpg' % (opt.outputFolder, 'train', epoch, desc), normalize=True) vutils.save_image(fake, '%s/%s/generated_textures_%03d_%s.jpg' % (opt.outputFolder, 'train', epoch, desc), normalize=True) fixnoise = setNoise(fixnoise) netG.eval() with torch.no_grad(): fakeBig = netG(fixnoise, emb) vutils.save_image(fakeBig, '%s/%s/big_texture_%03d_%s.jpg' % (opt.outputFolder, 'train', epoch, desc), normalize=True) netG.train() # save description description_dict = {} for j in range(len(description)): description_dict['{}'.format(j)] = description[j] with open( '%s/%s/description_%03d_%s.json' % (opt.outputFolder, 'train', epoch, desc), 'w') as f: json.dump(description_dict, f) ### evaluation dataset for k, data_eval in enumerate(dataloader_test, 0): # handle data texture_eval, emb_eval, description_eval, _, _, _ = data_eval texture_eval = texture_eval.to(device) emb_eval = emb_eval.to(device) netG.eval() noise = setNoise(noise) fake_eval = netG(noise, emb_eval) # vutils.save_image(texture, '%s/real_textures.jpg' % opt.outputFolder, normalize=True) vutils.save_image(texture_eval, '%s/%s/real_textures_%03d_%s.jpg' % (opt.outputFolder, 'eval', epoch, desc), normalize=True) vutils.save_image(fake_eval, '%s/%s/generated_textures_%03d_%s.jpg' % (opt.outputFolder, 'eval', epoch, desc), normalize=True) fixnoise = setNoise(fixnoise) with torch.no_grad(): # fakeBig_fake=netG(fixnoise) fakeBig_fake = netG(fixnoise, emb_eval) vutils.save_image(fakeBig_fake, '%s/%s/big_texture_%03d_%s.jpg' % (opt.outputFolder, 'eval', epoch, desc), normalize=True) netG.train() # save description description_eval_dict = {} for j in range(len(description_eval)): description_eval_dict['{}'.format( j)] = description_eval[j] with open( '%s/%s/description_%03d_%s.json' % (opt.outputFolder, 'eval', epoch, desc), 'w') as f: json.dump(description_eval_dict, f) break if epoch % 200 == 0: # save model save_model(netG, epoch, opt.outputFolder, 'netG') save_model(netD, epoch, opt.outputFolder, 'netD') # save_model(netE, epoch, opt.outputFolder, 'netE') epoch_total.append(epoch) # average errD_real_material_avg = errD_real_material_avg / (counter * opt.batchSize) errD_real_color_avg = errD_real_color_avg / (counter * opt.batchSize) errD_fake_material_avg = errD_fake_material_avg / (counter * opt.batchSize) errD_fake_color_avg = errD_fake_color_avg / (counter * opt.batchSize) errG_material_avg = errG_material_avg / (counter * opt.batchSize) errG_color_avg = errG_color_avg / (counter * opt.batchSize) # append to total errD_real_material_total.append(errD_real_material_avg) errD_real_color_total.append(errD_real_color_avg) errD_fake_material_total.append(errD_fake_material_avg) errD_fake_color_total.append(errD_fake_color_avg) errG_material_total.append(errG_material_avg) errG_color_total.append(errG_color_avg) # plot plot_loss(0, epoch_total, errD_real_material_total, 'errD_real_material_total', 'epoch', 'training error', opt.outputFolder, 'errD_real_material_total.png') plot_loss(1, epoch_total, errD_real_color_total, 'errD_real_color_total', 'epoch', 'training error', opt.outputFolder, 'errD_real_color_total.png') plot_loss(2, epoch_total, errD_fake_material_total, 'errD_fake_material_total', 'epoch', 'training error', opt.outputFolder, 'errD_fake_material_total.png') plot_loss(3, epoch_total, errD_fake_color_total, 'errD_fake_color_total', 'epoch', 'training error', opt.outputFolder, 'errD_fake_color_total.png') plot_loss(4, epoch_total, errG_material_total, 'errG_material_total', 'epoch', 'training error', opt.outputFolder, 'errG_material_total.png') plot_loss(5, epoch_total, errG_color_total, 'errG_color_total', 'epoch', 'training error', opt.outputFolder, 'errG_color_total.png')