def evaluate(data_source): model.eval() total_loss = 0 total_kld = 0 count = 0 truth_res = [] pred_res = [] for batch in data_source: data, label = batch.text, batch.label data, label = data.cuda(device_id), label.cuda(device_id) label.data.sub_(2) truth_res += list(label.data) args.batch_size = data.size(1) model.decoder.bsz = args.batch_size model.encoder.bsz = data.size(1) model.label.bsz = data.size(1) out_ix = data[1:, :].contiguous().view(-1) row = range(args.batch_size) label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id), requires_grad=False) label_2[row, label] = 1 recon_batch, z, fake_label = model(data[:-1, :]) _, pred_label = torch.max(fake_label, 1) pred_res += list(pred_label.data) count += 1 acc = get_accuracy(truth_res, pred_res) print(' acc :%g ' % (acc)) return acc
def train(): model.train() total_loss = 0 start_time = time.time() model.decoder.bsz = args.batch_size truth_res = [] pred_res = [] count = 0.0 iterator = zip(unsup_data, itertools.cycle(train_data)) for (unbatch, lbatch) in iterator: data, label = lbatch.text, lbatch.label undata = unbatch.text undata = undata.cuda(device_id) data, label = data.cuda(device_id), label.cuda(device_id) data.volatile = False label.volatile = False label.data.sub_(2) truth_res += list(label.data) args.batch_size = data.size(1) model.decoder.bsz = args.batch_size seq_len = data.size(0) - 1 out_ix = data[1:, :].contiguous().view(-1) unout_ix = undata[1:, :].contiguous().view(-1) row = range(args.batch_size) label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id), requires_grad=False) label_2[row, label] = 1 model.zero_grad() recon_batch, mu, logvar, fake_label = model(data[:-1, :], label_2) BCE, KLD = loss_function(recon_batch, out_ix, mu, logvar) label_loss = loss_label(fake_label, label_2) loss = label_loss + BCE + KLD model.decoder.bsz = undata.size(1) recon_batch, mu, logvar, _ = model(undata[:-1, :]) unBCE, unKLD = loss_function(recon_batch, unout_ix, mu, logvar) loss += unBCE + unKLD if args.model == "bvae": noise_loss = model.noise_loss(lr, alpha) noise_loss /= args.bptt * len(train_data) loss += noise_loss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() count += 1 total_loss += loss.data _, pred_label = torch.max(torch.exp(fake_label), 1) pred_res += list(pred_label.data) if count % args.log_interval == 0 and count > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | lr {:5.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | kld {:5.9f}'.format( epoch, lr, elapsed * 1000 / args.log_interval, cur_loss, KLD.data.item())) total_loss = 0 start_time = time.time() print('epoch: %d done!\n acc:%g' % (epoch, get_accuracy(truth_res, pred_res)))
def get_nll_lm_char(model, en_msg, word2idx, idx2word, nhid=800): # msg : (batch_size) with torch.no_grad(): batch_size = len(en_msg) num_chars = cuda(torch.FloatTensor([len(msg) for msg in en_msg])) #assert (min([len(msg) for msg in en_msg]) > 1) #msgs = [msg.split() + ["<eos>"] for msg in msgs] msgs_idx = [[ word2idx[tok] if tok in word2idx else word2idx["<unk>"] for tok in msg ] for msg in en_msg] msgs_len = [len(msg) for msg in msgs_idx] msgs_idx = [ np.lib.pad(msg, (0, max(msgs_len) - ln), 'constant', constant_values=(0, 0)) for (msg, ln) in zip(msgs_idx, msgs_len) ] data = cuda(torch.LongTensor(msgs_idx)).t() # (seq_len, batch_size) seq_len = data.size(0) input, target = data[:-1, :].contiguous(), data[1:, :].contiguous() hidden = ( cuda( torch.FloatTensor( 2, batch_size, nhid ).zero_() ), \ cuda( torch.FloatTensor( 2, batch_size, nhid ).zero_() ) ) output, _ = lm_forward(model, input, hidden) # output : (seq_len-1, batch_size, voc_size) logits = output.contiguous().view(-1, output.size()[-1]) nll = F.cross_entropy(logits, target.view(-1), ignore_index=0, reduce=False) # (batch_size, en_msg_len) nll = nll.view( -1, data.size(1)).t().contiguous() # (seq_len-1, batch_size) nll = nll.sum(dim=-1) / num_chars # (batch_size) return nll
def evaluate(data_source): model.eval() total_loss = 0 total_kld = 0 count = 0 truth_res = [] pred_res = [] for batch in data_source: data, label = batch.text, batch.label data, label = data.cuda(device_id), label.cuda(device_id) label.data.sub_(2) truth_res += list(label.data) args.batch_size = data.size(1) model.decoder.bsz = args.batch_size seq_len = data.size(0) - 1 out_ix = data[1:, :].contiguous().view(-1) row = range(args.batch_size) label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id), requires_grad=False) label_2[row, label] = 1 recon_batch, mu, logvar, fake_label = model(data[:-1, :], label_2) BCE, KLD = loss_function(recon_batch, out_ix, mu, logvar) loss = BCE + KLD _, pred_label = torch.max(fake_label, 1) pred_res += list(pred_label.data) total_loss += loss.data.item() total_kld += KLD.data.item() count += 1 avg = total_loss / count avg_kld = total_kld / count acc = get_accuracy(truth_res, pred_res) print(' acc :%g avg_loss:%g kld:%g' % (acc, avg, avg_kld)) return acc
def eval_epoch(model, data_iter, criterion): total_loss = 0. total_words = 0. for (data, target) in data_iter: #tqdm( #data_iter, mininterval=2, desc=' - Training', leave=False): data = Variable(data, volatile=True) target = Variable(target, volatile=True) if opt.cuda: data, target = data.cuda(), target.cuda() target = target.contiguous().view(-1) pred = model.forward(data) loss = criterion(pred, target) total_loss += loss.data.item() total_words += data.size(0) * data.size(1) data_iter.reset() return math.exp(total_loss / total_words)
def train_step(self, optimizer, start_time): accuracies = torch.zeros(self.log_interval) total_loss = 0 for i, batch in enumerate(self.train_iterator): #CLEARING HISTORY optimizer.zero_grad #GETTING TENSORS data, targets = batch.text, batch.label.view(-1) targets = targets - 1 #from zero to one data, lengths = data[0], data[1] #CONVERTING TO CUDA IF ON NEEDED if self.cuda: data = data.cuda() targets = targets.cuda() lengths = lengths.cuda() if data.size(0) == self.batch_size: #GETTING PREDICTIONS output, h, A = self.model(data, lengths = lengths) predictions = output.view(-1, self.num_classes) #GET ACCURACY preds = torch.max(predictions, dim = 1)[1] pct_correct = float(torch.sum(targets == preds)[0].data[0]/predictions.size(0)) accuracies[i % self.log_interval] = pct_correct if self.weight_saving: #SAVING ATTENTION WEIGHTS self.save_weights(i, data, A, h, preds, targets, 'train') #CALCULATING AND PROPAGATING LOSS loss = self.objective(predictions, targets) loss.backward() if self.clip is not None: torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip) if self.optim in ['adam', 'SGD']: optimizer.step() elif self.optim == 'vanilla_grad': parameters = filter(lambda p: p.requires_grad, self.model.parameters()) for p in parameters: p.data.add_(-self.lr, p.grad.data) total_loss += loss.data if i % self.log_interval == 0 and i != 0: current_accuracy = float(torch.sum(accuracies)) / float(torch.nonzero(accuracies).size(0)) current_loss = total_loss[0] / self.log_interval total_loss = 0 elapsed = time() - start_time accuracies = torch.zeros(self.log_interval) print('At time: {elapsed} accuracy is {current_accuracy} and loss is {loss}'\ .format(elapsed=elapsed, current_accuracy = current_accuracy, loss = current_loss)) return optimizer
def batchify(data, bsz): data = TEXT.numericalize([data.examples[0].text]) # Divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() return data.to(device)
def make_std_mask(data, pad): """ Create a mask to hide padding and future words. """ data_mask = (data != pad).unsqueeze(-2) data_mask = data_mask & Variable( subsequent_mask(data.size(-1)).type_as(data_mask.data)) return data_mask
def evaluate(model, data): model.eval() total_loss = 0. it = iter(data) total_count = 0. with torch.no_grad(): hidden = model.init_hidden(BATCH_SIZE, requires_grad=False) for i, batch in enumerate(it): data, target = batch.text, batch.target if USE_CUDA: data, target = data.cuda(), target.cuda() hidden = repackage_hidden(hidden) with torch.no_grad(): output, hidden = model(data, hidden) loss = loss_fn(output.view(-1, VOCAB_SIZE), target.view(-1)) total_count += np.multiply(*data.size()) total_loss += loss.item() * np.multiply(*data.size()) loss = total_loss / total_count model.train() return loss
def train_generator(model, data_iter, criterion, optimizer): total_loss = 0. total_words = 0. if not opt.server: data_iter = tqdm(data_iter, mininterval=2, desc=' - Generator Training', leave=False) for each in data_iter: data, target = each.text[:, :-1], each.text[:, 1:] if opt.cuda: data, target = data.cuda(), target.cuda() target = target.contiguous().view(-1) pred = model.forward(data) if len(pred.shape) > 2: pred = torch.reshape(pred, (pred.shape[0] * pred.shape[1], -1)) loss = criterion(pred, target) total_loss += loss.data.item() total_words += data.size(0) * data.size(1) optimizer.zero_grad() loss.backward() optimizer.step() return math.exp(total_loss / total_words)
def evaluate(data_source): model.eval() total_loss = 0 total_kld = 0 count = 0 truth_res = [] pred_res = [] pred = [] for batch in data_source: data, label = batch.text, batch.label data, label = data.cuda(device_id), label.cuda(device_id) label.data.sub_(2) truth_res += list(label.data) args.batch_size = data.size(1) model.decoder.bsz = args.batch_size seq_len = data.size(0) - 1 out_ix = data[1:, :].contiguous().view(-1) row = range(args.batch_size) label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id), requires_grad=False) label_2[row, label] = 1 if args.model == 'bae' or args.model == 'baeg': model.encoder.bsz = data.size(1) model.label.bsz = data.size(1) recon_batch, z, fake_label = model(data[:-1, :]) else: recon_batch, mu, logvar, fake_label = model(data[:-1, :], label_2) _, pred_label = torch.max(fake_label, 1) pred_res += list(pred_label.data) pred.append(fake_label) count += 1 pred = torch.cat(pred, 0) acc = get_accuracy(truth_res, pred_res) print(' acc :%g' % (acc)) return pred, truth_res
def make_contexts(batch, window_size=5, word_padding_idx=0, data_type='text'): """ Args: batch (Variable): a batch of source or target data. data_type (str): type of the source input. Options are [text|img|audio]. Returns: A sequence of context tensors of size (len x batch). """ #def get_context(data): #""" #Args: #data (Tensor): input tensor of size (len x batch). #Returns: #An iterable of context tensors of size (len x batch). #""" #padding = [word_padding_idx] * data.size()[1] #L = data.size()[0] #if L #for c in range(1, window_size + 1): #yield torch.cat([Variable(torch.LongTensor([padding] * c).cuda()), data[:-c]], 0) #for c in range(1, window_size + 1): #yield torch.cat([data[c:], Variable(torch.LongTensor([padding] * c).cuda())], 0) side = 'src' if isinstance(batch.__dict__[side], tuple): data = batch.__dict__[side][0] else: data = batch.__dict__[side] if data_type == 'text': L, B = data.size() pad = [[word_padding_idx] * B] * window_size pdata = torch.cat([ Variable(torch.LongTensor(pad).cuda()), data, Variable(torch.LongTensor(pad).cuda()) ], 0).transpose(1, 0) return torch.cat([ pdata[:, list(range(i, i + window_size)) + list(range(i + window_size, i + 2 * window_size))].unsqueeze(0) for i in range(L) ], 0) else: return data
def evaluate_twin(model, data_iter, loss_function): model.eval() loss_meter = meter.AverageValueMeter() loss_meter.reset() for batch in tqdm.tqdm(data_iter): data = batch.text model.batch_size = data.size(1) hidden = model.init_hidden() if opt.use_gpu: data = data.cuda() input_, target = Variable(data[:-1, :]), Variable(data[1:, :]) output = model.work(input_, hidden) loss = loss_function(output[0], target.view(-1)) loss_meter.add(loss.item()) return loss_meter.value()[0]
def evaluate(self): self.model.eval() i = 0 accuracies = torch.zeros(len(self.test_iterator)) total_loss = 0 for i, batch in enumerate(self.test_iterator): #GETTING TENSORS data, targets = batch.text, batch.label.view(-1) data, lengths = data[0], data[1] targets = targets - 1 #CONVERTING TO CUDA IF ON NEEDED if self.cuda: data = data.cuda() targets = targets.cuda() lengths = lengths.cuda() if data.size(0) == self.batch_size: #GETTING PREDICTIONS output, h, A = self.model(data, lengths = lengths) predictions = output.view(-1, self.num_classes) #GET ACCURACY preds = torch.max(predictions, dim = 1)[1] pct_correct = float(torch.sum(targets == preds)[0].data[0]/predictions.size(0)) accuracies[i] = pct_correct if self.weight_saving: #SAVING ATTENTION WEIGHTS self.save_weights(i, data, A, h, preds, targets, "test") #CALCULATING LOSS loss = self.objective(predictions, targets) total_loss += loss.data self.eval_accuracy = float(torch.sum(accuracies)) / float(torch.nonzero(accuracies).size(0)) print('Done Evaluating: Achieved accuracy of {}' .format(self.eval_accuracy))
def word_dropout(self, inputs): """ Do word dropout: with prob `p_word_dropout`, set the word to '<unk>'. """ if isinstance(inputs, Variable): data = inputs.data.clone() else: data = inputs.clone() # Sample masks: elems with val 1 will be set to <unk> mask = torch.from_numpy( np.random.binomial(1, p=self.p_word_dropout, size=tuple(data.size())).astype('uint8')) if self.gpu: mask = mask.cuda() # Set to <unk> data[mask] = self.UNK_IDX return Variable(data)
def train_twin(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 vis = Visualizer(env=opt.env) # 设置visdom的环境变量 # 获取数据 train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # 模型定义 model = lstm_twin(len(word2ix), 300, 150) best_model = model best_valid_loss = float("inf") optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) scheduler = t.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr=1e-5) # CrossEntropyLoss 会把每个字符的损失求平均,所以损失是个10以内的数,如果加上size_average = False, 就变成一个10000以内的 # 数了,正好差不多2000倍吧,如果想以每句话为单位,那么就乘上seq_len criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() count = 0 for epoch in range(opt.epoch): model.train() logging.info("这是第{0}次epoch".format(count + 1)) cnt = 0 b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = 0., 0., 0., 0. for batch in tqdm.tqdm( train_iter ): # tqdm是一个python进度条库,可以封装iterator,it/s表示的就是每秒迭代了多少次 # 训练 data = batch.text seq_len = data.size(0) # 生成一个倒着的序列,因为tensor不支持负步长 idx = np.arange(seq_len)[::-1].tolist() idx = t.LongTensor(idx) idx = Variable(idx).cuda() model.batch_size = data.size(1) hidden1 = model.init_hidden() hidden2 = model.init_hidden() if opt.use_gpu: data = data.cuda() optimizer.zero_grad() # 输入和目标错开,CharRNN的做法 f_input, f_target = Variable(data[:-1, :]), Variable(data[1:, :]) bx = data.index_select(0, idx) b_input, b_target = Variable(bx[:-1, :]), Variable(bx[1:, :]) # print(f_input.size(),b_input.size()) f_out, b_out, f_h, b_h = model(f_input, b_input, hidden1, hidden2) f_loss = criterion(f_out, f_target.view(-1)) b_loss = criterion(b_out, b_target.view(-1)) b_h_inv = b_h.index_select(0, idx[1:]) b_h_inv = b_h_inv[1:] #将<sos>去除 # print(f_h.size(), b_h_inv.size()) b_h_inv = b_h_inv.detach() f_h = f_h[:-1] #将<eos>去掉 twin_loss = ((f_h - b_h_inv)**2).mean() twin_loss *= 1.5 all_loss = f_loss + b_loss + twin_loss all_loss.backward() t.nn.utils.clip_grad_norm(model.parameters(), 5.) optimizer.step() # 累加 b_all_loss += all_loss.item() b_fwd_loss += f_loss.item() b_bwd_loss += b_loss.item() b_twin_loss += twin_loss.item() # 可视化 if (1 + cnt) % opt.plot_every == 0: vis.plot('all_loss', b_all_loss / opt.plot_every) vis.plot('twin_loss', b_twin_loss / opt.plot_every) vis.plot('loss', b_fwd_loss / opt.plot_every) # logging.info("训练第{}个plot的all_loss:{:f}, f_loss: {:f}, b_loss: {:f}, twin_loss: {:f}" # .format(int((cnt + 1) / opt.plot_every), b_all_loss / opt.plot_every, # b_fwd_loss / opt.plot_every, # b_bwd_loss / opt.plot_every, b_twin_loss / opt.plot_every)) b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = 0., 0., 0., 0. cnt += 1 count += 1 valid_loss = evaluate_twin(model, valid_iter, criterion) scheduler.step(valid_loss) logging.info("第%d次验证集的loss为: %f" % (count, valid_loss)) if valid_loss < best_valid_loss: # os.system('rm ' + opt.model_prefix +opt.model + '.pth') best_valid_loss = valid_loss best_model = model t.save(best_model.state_dict(), '%s%s_%d.pth' % (opt.model_prefix, opt.model, count)) test_loss = evaluate_twin(best_model, test_iter, criterion) logging.info("测试集的loss为: %f" % test_loss) # 学习率减半 if epoch in [5, 10, 15]: for param_group in optimizer.param_groups: lr = param_group['lr'] lr *= 0.5 param_group['lr'] = lr
def Bleu(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 print('Loading model from {}'.format(opt.model_path)) # 加载词典 if os.path.exists(opt.pickle_path): data = np.load(opt.pickle_path) word2ix, ix2word = data['word2ix'].item(), data['ix2word'] else: train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # 加载模型 if opt.model == 'lstm': model = lstm(len(word2ix), 300, 150) elif opt.model == 'lstm_twin': model = lstm_twin(len(word2ix), 300, 150) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() print("加载完毕") # model.eval() hypothesis = [] references = [] cnt = 0 for batch in tqdm.tqdm(test_iter): cnt += 1 # batch = next(iter(test_iter)) data = batch.text if opt.model == 'lstm_twin': model.batch_size = data.size(1) hidden = model.init_hidden() if opt.use_gpu: data = data.cuda() input_, target = Variable(data[:-1, :]), Variable(data[1:, :]) tmp = target.transpose(0, 1).cpu().numpy() # print(tmp) print('===========输入==========') for ii in tmp: ii_ = list(ii) for i in ii_: print(ix2word[i], end='') print('') ii_ = ii_[:ii_.index(3) + 1] references.append([ii_]) print('===========输出==========') # print(references) if opt.model == 'lstm': output, _ = model(input_) output = output.view(data.size(0) - 1, data.size(1), -1) elif opt.model == 'lstm_twin': output = model.work(input_, hidden) output = output[0].view(data.size(0) - 1, data.size(1), -1) # print(output.size()) top = output.topk(1, dim=2)[1].squeeze().transpose(0, 1) top = top.cpu().numpy() for ii in top: ii_ = list(ii) for i in ii_: print(ix2word[i], end='') print('') haha = ii_.index(3) if 3 in ii_ else None if (haha): ii_ = ii_[:haha + 1] hypothesis.append(ii_) # if cnt > 10: # break # print(hypothesis) bleu1 = corpus_bleu(references, hypothesis, weights=(1, 0, 0, 0)) bleu2 = corpus_bleu(references, hypothesis, weights=(1. / 2., 1. / 2., 0, 0)) bleu3 = corpus_bleu(references, hypothesis, weights=(1. / 3., 1. / 3., 1. / 3., 0)) bleu4 = corpus_bleu(references, hypothesis) print("bleu1: ", bleu1, "bleu2: ", bleu2, "bleu3: ", bleu3, "bleu4: ", bleu4)
def train(): model.train() total_loss = 0 start_time = time.time() model.decoder.bsz = args.batch_size truth_res = [] pred_res = [] count = 0.0 iterator = zip(unsup_data, itertools.cycle(train_data)) for (unbatch, lbatch) in iterator: data, label = lbatch.text, lbatch.label undata = unbatch.text undata = undata.cuda(device_id) data, label = data.cuda(device_id), label.cuda(device_id) data.volatile = False label.volatile = False label.data.sub_(2) truth_res += list(label.data) args.bptt = (data.size(0) + undata.size(0)) / 2 out_ix = data[1:, :].contiguous().view(-1) unout_ix = undata[1:, :].contiguous().view(-1) row = range(data.size(1)) label_2 = Variable(torch.zeros(data.size(1), 2).cuda(device_id), requires_grad=False) label_2[row, label] = 1 model.zero_grad() for j in range(J): if j == 0: model.zero_grad() model.decoder.bsz = data.size(1) model.encoder.bsz = data.size(1) model.label.bsz = data.size(1) recon_batch, z, fake_label = model(data[:-1, :]) model.decoder.bsz = undata.size(1) model.encoder.bsz = undata.size(1) model.label.bsz = undata.size(1) unrecon_batch, unz, _ = model(undata[:-1, :]) z_sample = Variable(z.data, requires_grad=True) z_optimizer = z_opt(z_sample) z_optimizer.zero_grad() unz_sample = Variable(unz.data, requires_grad=True) unz_optimizer = z_opt(unz_sample) unz_optimizer.zero_grad() else: model.zero_grad() emb = model.embed(data[:-1, :]) model.decoder.bsz = data.size(1) model.label.bsz = data.size(1) fake_label = model.label(emb, z_sample) recon_batch = model.decoder(emb, z_sample) model.decoder.bsz = undata.size(1) model.label.bsz = undata.size(1) unemb = model.embed(undata[:-1, :]) unrecon_batch = model.decoder(unemb, unz_sample) BCE = loss_function(recon_batch, out_ix) unBCE = loss_function(unrecon_batch, unout_ix) label_loss = loss_label(fake_label, label_2) noise_loss = model.noise_loss(lr, alpha) noise_loss /= args.bptt * len(train_data) prior_loss_z = z_prior_loss(z_sample) noise_loss_z = z_noise_loss(z_sample) prior_loss_z /= args.bptt * len(train_data) noise_loss_z /= args.bptt * len(train_data) unprior_loss_z = z_prior_loss(unz_sample) unnoise_loss_z = z_noise_loss(unz_sample) unprior_loss_z /= args.bptt * len(train_data) unnoise_loss_z /= args.bptt * len(train_data) loss = BCE + unBCE + label_loss + noise_loss + prior_loss_z + noise_loss_z + unprior_loss_z + unnoise_loss_z if j > burnin + 1: loss_en = en_loss(z_sample, z) unloss_en = en_loss(unz_sample, unz) loss += loss_en + unloss_en if j % 2 == 0: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() else: loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) z_optimizer.step() unz_optimizer.step() count += 1 total_loss += label_loss.data + BCE.data + unBCE.data _, pred_label = torch.max(torch.exp(fake_label), 1) pred_res += list(pred_label.data) if count % args.log_interval == 0 and count > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | lr {:5.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} '.format(epoch, lr, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time() print('epoch: %d done!\n acc:%g' % (epoch, get_accuracy(truth_res, pred_res)))