def train_base(train_loader, model, criterion, optimizer_a, epoch, print_freq, writer): losses = AverageMeter() top1 = AverageMeter() model.train() begin_step = (epoch - 1) * len(train_loader) # total_t = 0 for i, (input, target) in enumerate(train_loader): input_var = to_var(input, requires_grad=False) target_var = to_var(target, requires_grad=False) # t1 = time.time() output = model(input_var) loss = criterion(output, target_var) # reduction='mean', [1,] prec_train = accuracy(output.data, target_var.data, topk=(1, ))[0] # 普通更新 optimizer_a.zero_grad() loss.backward() optimizer_a.step() # CE loss, reduction='mean' losses.update(loss.item(), input.size(0)) top1.update(prec_train.item(), input.size(0)) writer.add_scalar('Train/loss', losses.avg, global_step=begin_step + i) writer.add_scalar('Train/top1_acc', top1.avg, global_step=begin_step + i) # total_t += time.time() - t1 # idx in trainloader if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), loss=losses, top1=top1))
def validate_model(model, dataloder, criterion): model.train(False) steps = len(dataloder.dataset) // dataloder.batch_size running_loss = 0.0 running_cls_loss = 0.0 running_loc_loss = 0.0 running_corrects = 0 for i, (inputs, labels, bboxes, _) in enumerate(dataloder): inputs, labels, bboxes = to_var(inputs, True), to_var(labels, True), to_var( bboxes, True) # forward scores, locs = model(inputs) _, preds = torch.max(scores.data, 1) cls_loss, loc_loss = criterion(scores, locs, labels, bboxes) loss = cls_loss + 10.0 * loc_loss # statistics running_cls_loss = (running_cls_loss * i + cls_loss.data[0]) / (i + 1) running_loc_loss = (running_loc_loss * i + loc_loss.data[0]) / (i + 1) running_loss = (running_loss * i + loss.data[0]) / (i + 1) running_corrects += torch.sum(preds == labels.data) # report sys.stdout.flush() sys.stdout.write( "\r Step %d/%d | Loss: %.5f (%.5f + %.5f)" % (i, steps, running_loss, running_cls_loss, running_loc_loss)) epoch_loss = running_loss epoch_acc = running_corrects / len(dataloder.dataset) sys.stdout.flush() print('\r{} Loss: {:.5f} ({:.5f} + {:.5f}), Acc: {:.5f}'.format( ' valid', epoch_loss, running_cls_loss, running_loc_loss, epoch_acc)) return epoch_acc
def test(self): self.model.eval() true_scores_list = list() false_scores_list = list() for batch_i, (contexts, res_true, res_ns1, res_ns2, res_ns3, res_ns4) in \ enumerate(tqdm(self.eval_data_loader, ncols=80)): ns_list = [None, res_ns1, res_ns2, res_ns3, res_ns4] res_ns = ns_list[self.config.test_target_ng] with torch.no_grad(): contexts = to_var(torch.FloatTensor(contexts)) res_trues = to_var(torch.FloatTensor(res_true)) res_falses = to_var(torch.FloatTensor(res_ns)) # Call forward function true_scores = self.model.score(contexts, res_trues) false_scores = self.model.score(contexts, res_falses) true_scores_list += true_scores.data.cpu().numpy().tolist() false_scores_list += false_scores.data.cpu().numpy().tolist() return true_scores_list, false_scores_list
def encode(self, x, o_cond=None): kwargs = {} batch_size = x.size(0) means, log_var = self.encoder(x, o_cond) if o_cond is not None: means_cond, log_var_cond = self.prior(o_cond) kwargs['means_cond'] = means_cond kwargs['log_var_cond'] = log_var_cond std = torch.exp(0.5 * log_var) eps = to_var(torch.randn([batch_size, self.latent_size])) z = eps * std + means return z, means, log_var, kwargs
def __init__(self, *args, **kwargs): super().__init__() ignore = nn.Conv2d(*args, **kwargs) self.in_channels = ignore.in_channels self.out_channels = ignore.out_channels self.stride = ignore.stride self.padding = ignore.padding self.dilation = ignore.dilation self.groups = ignore.groups self.kernel_size = ignore.kernel_size # register_buffer, 存储 params self.register_buffer('weight', to_var(ignore.weight.data, requires_grad=True)) if ignore.bias is not None: self.register_buffer('bias', to_var(ignore.bias.data, requires_grad=True)) else: self.register_buffer('bias', None)
def gen_score(adaptive, res_loader): LMcriterion = nn.CrossEntropyLoss(ignore_index=0) if torch.cuda.is_available(): LMcriterion.cuda() adaptive.eval() total_scores = [] print '--------------Start Scoring on Generated dataset---------------' for i, (word, sememes, definition) in enumerate(res_loader): word = to_var(word) sememes = to_var(sememes) definition = to_var(definition) targets = definition[:, 1:] scores, _ = adaptive(word, sememes, definition) scores = scores[:, :-1, :].transpose(1, 2) loss = LMcriterion(scores, targets) total_scores.append(str(np.exp(loss.data[0]))) if (i + 1) % 10 == 0: print '[%s/%s]' % ((i + 1), len(res_loader)) return total_scores
def __init__(self, hps, data_loader, g_mode, enc_mode, log_dir='./log/'): self.hps = hps self.data_loader = data_loader self.model_kept = [] self.max_keep = hps.max_to_keep self.logger = Logger(log_dir) self.g_mode = g_mode self.enc_mode = enc_mode if self.g_mode != 'naive': self.shift_c = to_var(torch.from_numpy(np.array([int(hps.n_speakers-hps.n_target_speakers) \ for _ in range(hps.batch_size)])), requires_grad=False) self.build_model()
def get_data_from_batch(batch, w2i, act2i): uttrs_list = [d[0] for d in batch] dialog_maxlen = max([len(uttrs) for uttrs in uttrs_list]) uttr_maxlen = max([len(u) for uttrs in uttrs_list for u in uttrs]) uttr_var = make_word_vector(uttrs_list, w2i, dialog_maxlen, uttr_maxlen) batch_labels = [d[1] for d in batch] labels_var = [] for labels in batch_labels: vec_labels = [act2i[l] for l in labels] pad_len = dialog_maxlen - len(labels) for _ in range(pad_len): vec_labels.append(act2i[SILENT]) labels_var.append(torch.LongTensor(vec_labels)) labels_var = to_var(torch.stack(labels_var, 0)) batch_prev_acts = [d[4] for d in batch] prev_var = [] for prev_acts in batch_prev_acts: vec_prev_acts = [] for act in prev_acts: tmp = [0] * len(act2i) tmp[act2i[act]] = 1 vec_prev_acts.append(tmp) pad_len = dialog_maxlen - len(prev_acts) for _ in range(pad_len): vec_prev_acts.append([0] * len(act2i)) prev_var.append(torch.FloatTensor(vec_prev_acts)) prev_var = to_var(torch.stack(prev_var, 0)) context = copy.deepcopy([d[2] for d in batch]) context = padding(context, 1, dialog_maxlen) bow = copy.deepcopy([d[3] for d in batch]) bow = padding(bow, 0, dialog_maxlen) act_filter = copy.deepcopy([d[5] for d in batch]) act_filter = padding(act_filter, 0, dialog_maxlen) return uttr_var, labels_var, context, bow, prev_var, act_filter
def train(model, train_set, eval_set, dt_logger): if torch.cuda.is_available(): model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # for epoch in range(num_of_epochs): for data_file in train_set: model.train() data_iter = data_loader.get_loader(data_file, 1) running_loss = 0.0 for idx, (stats, temporal, spatial, dr_state, short_ttf, long_ttf, helpers) in enumerate(data_iter): stats, temporal, spatial, dr_state = utils.to_var( stats), utils.to_var(temporal), utils.to_var( spatial), utils.to_var(dr_state) short_ttf, long_ttf = utils.to_var(short_ttf), utils.to_var( long_ttf) loss = model.evaluate(stats, temporal, spatial, dr_state, short_ttf, long_ttf, helpers) optimizer.zero_grad() loss.sum().backward() optimizer.step() running_loss += loss.mean().data.item()
def get_interpolations(vae, sample_start, sample_end, args): model = vae['model'] tokenizer = vae['tokenizer'] w2i = vae['w2i'] i2w = vae['i2w'] # Initialize semantic loss # sl = Semantic_Loss() start_encode = tokenizer.encode(sample_start) end_encode = tokenizer.encode(sample_end) with torch.no_grad(): z1 = model._encode(**start_encode) z1_hidden = z1['z'].cpu()[0] z2 = model._encode(**end_encode) z2_hidden = z2['z'].cpu()[0] z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=args.steps)).float()) if args.rnn_type == "lstm": z1_cell_state = z1['z_cell_state'].cpu()[0].squeeze() z2_cell_state = z2['z_cell_state'].cpu()[0].squeeze() # print(z1_cell_state.shape) z_cell_states = \ to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=args.steps)).float()) samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states) else: samples, _ = model.inference(z=z_hidden, z_cell_state=None) # print('-------INTERPOLATION-------') interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']) # For each sentence, get the perplexity and show it # for sentence in interpolated_sentences: # print(sentence + "\t\t" + str(sl.get_perplexity(sentence))) # print(sentence) return interpolated_sentences
def validate(valid_loader, model, criterion, epoch, print_freq, writer=None, prefix='Test'): """Perform validation on the validation set""" model.eval() losses = AverageMeter() top1 = AverageMeter() for i, (input, target) in enumerate(valid_loader): input_var = to_var(input, requires_grad=False) target_var = to_var(target, requires_grad=False) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1 = accuracy(output.data, target_var.data, topk=(1, ))[0] losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) # measure elapsed time if (i + 1) % print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i + 1, len(valid_loader), loss=losses, top1=top1)) print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) if writer: writer.add_scalar(f'{prefix}/test_loss', losses.avg, global_step=epoch) writer.add_scalar(f'{prefix}/test_acc', top1.avg, global_step=epoch) return losses.avg, top1.avg
def evaluate(self): self.model.eval() true_scores_list = list() false_scores_list = list() batch_loss_history = list() for batch_i, (contexts, res_true, res_ns1, res_ns2, res_ns3, res_ns4) in \ enumerate(tqdm(self.eval_data_loader, ncols=80)): with torch.no_grad(): contexts = to_var(torch.FloatTensor(contexts)) res_trues = to_var(torch.FloatTensor(res_true)) res_ns1 = to_var(torch.FloatTensor(res_ns1)) res_ns2 = to_var(torch.FloatTensor(res_ns2)) res_ns3 = to_var(torch.FloatTensor(res_ns3)) res_ns4 = to_var(torch.FloatTensor(res_ns4)) # Call forward function true_scores = self.model.score(contexts, res_trues) false_scores = self.model.score(contexts, res_ns4) true_scores_list += true_scores.data.cpu().numpy().tolist() false_scores_list += false_scores.data.cpu().numpy().tolist() # Call forward function batch_loglikelihood = self.model(contexts, res_trues, res_ns1, res_ns2, res_ns3, res_ns4) batch_loss = -torch.sum(batch_loglikelihood) assert not isnan(batch_loss.item()) batch_loss_history.append(batch_loss.item()) epoch_loss = np.sum(batch_loss_history) return np.mean(true_scores_list), np.mean( false_scores_list), epoch_loss
def get_style_content_space(self, input_sequence): batch_size = input_sequence.size(0) #get batch size input_embedding = self.embedding( input_sequence) # convert to embeddings ################### encoder ################## _, hidden = self.encoder(input_embedding) # hidden -> (B, H) ###### if the RNN has multiple layers, flatten all the hiddens states if self.bidirectional or self.num_layers > 1: hidden = torch.mean(hidden, axis=1) else: hidden = hidden.squeeze() ############## REPARAMETERIZATION of style and content############### ############style component############ style_mean = self.hidden2stylemean(hidden) #calc latent mean style_logv = self.hidden2stylelogv(hidden) #calc latent variance style_std = torch.exp(0.5 * style_logv) #find sd style_z = to_var(torch.randn([batch_size, self.style_space_size ])) #get a random vector style_z = style_z * torch.exp( style_logv) + style_mean #copmpute datapoint ############content component############### content_mean = self.hidden2contentmean(hidden) #calc latent mean content_logv = self.hidden2contentlogv(hidden) #calc latent variance content_std = torch.exp(0.5 * content_logv) #find sd content_z = to_var(torch.randn([batch_size, self.content_space_size ])) #get a random vector content_z = content_z * torch.exp( content_logv) + content_mean #compute datapoint return style_z, content_z
def eval(self, images_path, caption_path, name): eval_size = self.args.eval_size beam_size = self.args.beam_size # Get image data loader cocoFolder = CocoImageFolder(images_path, caption_path, self.transform) data_loader = torch.utils.data.DataLoader(cocoFolder, batch_size=self.eval_size, shuffle=False, num_workers=self.num_workers, drop_last=False) num_batches = len(data_loader) res = [] # every item in list is a batch of imgs, imgids, filenames for i, (images, image_ids, filenames) in enumerate(data_loader): if i % 100 == 0: print "Processed {}/{}".format(i, num_batches) images = to_var(images) # generated_captions, attention, beta = self.model.sampler( images ) # with beam search generated_captions, attention, beta = self.model.mysampler( images, beam_size=self.beam_size) captions = generated_captions.cpu().data.numpy() for image_idx in range(captions.shape[0]): sampled_ids = captions[image_idx] sampled_caption = [] for word_id in sampled_ids: word = self.vocab.idx2word[word_id] if word == '<end>': break else: sampled_caption.append(word) sentence = ' '.join(sampled_caption) res.append({ "image_id": image_ids[image_idx], "caption": sentence }) # save results to file resName = "./results/" + name + ".json" with open(resName, 'w') as file: json.dump(res, file) print "{} is saved!".format(resName)
def forward(self, input_ids, attention_mask, length): batch_size = input_ids.shape[0] hidden = self.encoder(input_ids, attention_mask).last_hidden_state[:, 0, :] # REPARAMETERIZATION mean = self.hidden2mean(hidden) logv = self.hidden2logv(hidden) std = torch.exp(0.5 * logv) z = to_var(torch.randn([batch_size, self.latent_size])) z = z * std + mean # DECODER hidden = self.latent2hidden(z) hidden = hidden.view(self.hidden_factor, batch_size, self.hidden_size) decoder_input_sequence = input_ids.clone() # decoder input if self.word_dropout_rate > 0: # randomly replace decoder input with <unk> prob = torch.rand(input_ids.size()) if torch.cuda.is_available(): prob = prob.cuda() prob[(input_ids.data - self.sos_idx) * (input_ids.data - self.pad_idx) == 0] = 1 decoder_input_sequence[ prob < self.word_dropout_rate] = self.mask_idx input_embedding = self.embedding(decoder_input_sequence) input_embedding = self.embedding_dropout(input_embedding) # rnn input preparation sorted_lengths, sorted_idx = torch.sort(length, descending=True) packed_input = rnn_utils.pack_padded_sequence( input_embedding, sorted_lengths.data.tolist(), batch_first=True) # decoder forward pass outputs, _ = self.decoder_rnn(packed_input, hidden) # process outputs padded_outputs = rnn_utils.pad_packed_sequence( outputs, batch_first=True, padding_value=self.pad_idx)[0] padded_outputs = padded_outputs.contiguous() _, reversed_idx = torch.sort(sorted_idx) padded_outputs = padded_outputs[reversed_idx] # project outputs to vocab logp = nn.functional.log_softmax(self.outputs2vocab(padded_outputs), dim=-1) return logp, mean, logv, z
def forward(self, input_utterances, input_utterance_length, input_conversation_length, target_utterances, decode=False): """ Forward of HRED :param input_utterances: [num_utterances, max_utter_len] :param input_utterance_length: [num_utterances] :param input_conversation_length: [batch_size] :param target_utterances: [num_utterances, seq_len] :param decode: True or False :return: decoder_outputs """ num_utterances = input_utterances.size(0) max_conv_len = input_conversation_length.data.max().item() encoder_outputs, encoder_hidden = self.encoder(input_utterances, input_utterance_length) encoder_hidden = encoder_hidden.transpose(1, 0).contiguous().view( num_utterances, -1) start = torch.cumsum( torch.cat((to_var(input_conversation_length.data.new(1).zero_()), input_conversation_length[:-1])), 0) encoder_hidden = torch.stack([ pad(encoder_hidden.narrow(0, s, l), max_conv_len) for s, l in zip( start.data.tolist(), input_conversation_length.data.tolist()) ], 0) context_outputs, context_last_hidden = self.context_encoder( encoder_hidden, input_conversation_length) context_outputs = torch.cat([ context_outputs[i, :l, :] for i, l in enumerate(input_conversation_length.data) ]) decoder_init = self.context2decoder(context_outputs) decoder_init = decoder_init.view(self.decoder.num_layers, -1, self.decoder.hidden_size) if not decode: decoder_outputs = self.decoder(target_utterances, init_h=decoder_init, decode=decode) return decoder_outputs else: prediction, final_score, length = self.decoder.beam_decode( init_h=decoder_init) return prediction
def geom_prior_step(y_prev, z_pres_prev): """ The AIR inference network produces three sets of variables for each entity at every time-step: * a 1-dimensional Bernoulli variable indicating the entity’s presence (z^{i}_{pres}) * a C-dimensional distributed vector describing its class or appearance (z^{i}_{what}) * a 3-dimensional vector specifying the affine parameters of its position and scale (z^{i}_{where}) [Eslami et al., 2016] """ # z_pres: bernoulli random variable indicating if sampled digit should be included z_pres = np.random.binomial(1, 0.5 * z_pres_prev) # if z_pres_prev = 0, z_pres = 0 z_pres = utils.to_var(torch.from_numpy(z_pres).float()) y = sample_glimpse_prior() * z_pres # sample a single digit, according to z_pres return y_prev + y, z_pres
def validate_step(model, valid_dl, criterion): model.eval() N = len(valid_dl.dataset) steps = N // valid_dl.batch_size avg_loss = 0.0 for i, (anc, pos, neg) in enumerate(valid_dl): anc = to_var(anc, volatile=True) pos = to_var(pos, volatile=True) neg = to_var(neg, volatile=True) f_anc, f_pos, f_neg = model(anc, pos, neg) loss = criterion(f_anc, f_pos, f_neg) avg_loss = (avg_loss * i + loss.data[0]) / (i + 1) # report sys.stdout.flush() sys.stdout.write("\r Validation Step [{}/{}]: loss {:.5f} ".format( i + 1, steps + 1, avg_loss)) print() return avg_loss
def inference(self, o_cond=None, n_samples=1, layer_cond=True): """ :param o_cond: if we want to condition on real images n x C x H x W :param n_samples: the number of z samples per o_cond :return: sample from learned P_theta """ batch_size = n_samples if o_cond is None else n_samples * o_cond.size( 0) z = to_var(torch.randn([1, n_samples, self.latent_size])).repeat(batch_size // n_samples, 1, 1) \ .permute(1, 0, 2).reshape(batch_size, -1) o_cond_rep = None if o_cond is not None: o_cond_rep = o_cond.repeat(n_samples, 1, 1, 1) mu_cond, logvar_cond = self.prior(o_cond_rep) std_cond = torch.exp(0.5 * logvar_cond) eps = to_var(torch.randn([batch_size, self.latent_size])) z = eps * std_cond + mu_cond # Now both z and o_cond has size 0 = batch_size recon_x = self.decoder(z, o_cond_rep) if o_cond is not None and layer_cond: recon_x = torch.cat([o_cond, recon_x]) return recon_x
def test(): model.eval() test_loss = 0. test_accuracy = 0. counter = 0 correct = 0 for data, target in test_dataset: data = to_var(torch.LongTensor(data)) # (bs, seq_len) target = to_var(torch.LongTensor(target)) # (bs,) if args.cuda: data, target = data.cuda(), target.cuda() output = model(data) # sum up batch loss test_loss += F.nll_loss(output, target).item() _, pred_ids = torch.max(output, 1) # get the index of the max log-probability pred = output.data.max(1, keepdim=True)[1] correct += torch.sum(pred_ids == target).data.item() counter += data.size(0) test_accuracy += pred.eq(target.data.view_as(pred)).cpu().float().sum() # print('Test Acc: {:.2f} % ({}/{})'.format(100 * correct / counter, correct, counter)) # print('Test Loss: {:.4f}'.format(losses/counter)) # # Horovod: use test_sampler to determine the number of examples in # # this worker's partition. test_loss /= counter test_accuracy /= counter # Horovod: average metric values across workers. test_loss = metric_average(test_loss, 'avg_loss') test_accuracy = metric_average(test_accuracy, 'avg_accuracy') # Horovod: print output only on first rank. if hvd.rank() == 0: print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format( test_loss, 100. * test_accuracy))
def sample_noise(dim): """ Generate a PyTorch Variable of uniform random noise. Input: - batch_size: Integer giving the batch size of noise to generate. - dim: Integer giving the dimension of noise to generate. Output: - A PyTorch Variable of shape (batch_size, dim, 1, 1) containing uniform random noise in the range (-1, 1). """ return utils.to_var(torch.rand(batch_size, dim) * 2 - 1).unsqueeze(2).unsqueeze(3)
def train(model, data, test_data, optimizer, loss_fn, n_epoch=10): print('=========training=========') model.train() for epoch in range(n_epoch): print('----epoch', epoch) random.shuffle(data) for batch_ct, (X, Y) in enumerate(data): X = to_var(torch.LongTensor(X)) # (bs, seq_len) Y = to_var(torch.LongTensor(Y)) # (bs,) # print(X.size(), Y.size()) # print(X) pred = model(X) # (bs, ans_size) # _, pred_ids = torch.max(pred, 1) loss = loss_fn(pred, Y) if batch_ct % 100 == 0: print('loss: {:.4f}'.format(loss.data[0])) optimizer.zero_grad() loss.backward() optimizer.step() print('current performance at ecpoh', epoch) test(model, test_data)
def test(model, data): model.eval() counter = 0 correct = 0 losses = 0.0 counter = 0 for batch_ct, (X, Y) in enumerate(data): X = to_var(torch.LongTensor(X)) # (bs, seq_len) print(X.size(0)) Y = to_var(torch.LongTensor(Y)) # (bs,) pred = model(X) # (bs, ans_size) loss = loss_fn(pred, Y) losses += torch.sum(loss).data.item() * X.size(0) _, pred_ids = torch.max(pred, 1) # print('loss: {:.4f}'.format(loss.data[0])) correct += torch.sum(pred_ids == Y).data.item() counter += X.size(0) print('Test Acc: {:.2f} % ({}/{})'.format(100 * correct / counter, correct, counter)) print('Test Loss: {:.4f}'.format(math.exp(losses / counter)))
def embed(self, x): """word index: [batch_size] => word vectors: [batch_size, hidden_size]""" if self.training and self.word_drop > 0.0: if random.random() < self.word_drop: embed = self.embedding(to_var(x.data.new([UNK_ID] * x.size(0)))) else: embed = self.embedding(x) else: embed = self.embedding(x) return embed
def get_position_embedding(self, story_sent_len, story_len, batch_size): J = story_sent_len d = self.embd_size pe = to_var(torch.zeros(J, d)) # (story_sent_len, embd_size) for j in range(1, J + 1): for k in range(1, d + 1): l_kj = (1 - j / J) - (k / d) * (1 - 2 * j / J) pe[j - 1][k - 1] = l_kj pe = pe.unsqueeze(0).unsqueeze(0) # (1, 1, story_sent_len, embd_size) pe = pe.repeat(batch_size, story_len, 1, 1) # (bs, story_len, story_sent_len, embd_size) return pe
def save_gradient_pic(D, fixed_generated_images, iteration, opts): pic = utils.to_var(fixed_generated_images) pic.requires_grad_(True) loss = mse_loss(D(pic), 1) path = os.path.join(opts.sample_dir, 'gradients-{:06d}.png'.format(iteration)) loss.backward() gradients = utils.to_data(pic.grad) # Only red channel gradients[:, :] = np.sqrt(np.sum(gradients**2, axis=1, keepdims=True)) grid = create_image_grid(gradients) scipy.misc.imsave(path, grid)
def plot_errors(model, dataloader): model.train(False) plt.figure(figsize=(12, 24)) count = 0 for (inputs, labels, _) in tqdm(dataloader): inputs, labels = to_var(inputs, volatile=True), to_var(labels, volatile=True) outputs = model(inputs) _, preds = torch.max(outputs.data, 1) incorrect_idxs = np.flatnonzero(preds.cpu().numpy() != labels.data.cpu().numpy()) for idx in incorrect_idxs: count += 1 if count > 30: break ax = plt.subplot(10, 3, count) ax.axis('off') ax.set_title('predicted: {}'.format(dataloader.dataset.classes[preds[idx]])) imshow(inputs.cpu().data[idx]) plt.show() print("{} images out of {} were misclassified.".format(count, len(dataloader.dataset)))
def main(args): with open(args.data_dir + '/poems.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE(vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional, condition_size=0) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict( torch.load(args.load_checkpoint, map_location=torch.device('cpu'))) print("Model loaded from %s" % (args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=args.num_samples) print('----------SAMPLES----------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # while True: # samples, z = model.inference(n=1, condition=torch.Tensor([[1, 0, 0, 0, 0, 0, 0]]).cuda()) # poem = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>'])[0] # if 'love' in poem: # breakpoint() z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var( torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) # samples, _ = model.inference(z=z, condition=torch.Tensor([[1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0]]).cuda()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n')
def visualize_results(self, X=None, epoch=0): print("visualize results...") image_num = 100 batch_size = image_num / 2 # row = int(sqrt(image_num)) row = 10 nrows = 8 ncols = 8 reconstruc = True save_dir = os.path.join(self.root, self.result_dir, self.dataset, self.model_name) self.G.eval() #self.E.eval() self.FC.eval() if not os.path.exists(save_dir): os.makedirs(save_dir) # Reconstruction and generation z = utils.to_var(torch.randn(batch_size * 2, self.z_dim)) X_hat = self.G(z) # randomly generated sample self.G.train() #self.E.train() self.FC.train() if torch.cuda.is_available(): samples = X_hat.cpu().data.numpy().transpose(0, 2, 3, 1) # 1 origins = X.cpu().data.numpy().transpose(0, 2, 3, 1) # 2 else: samples = X_hat.data.numpy().transpose(0, 2, 3, 1) origins = X.data.numpy().transpose(0, 2, 3, 1) # 2 # Save images utils.save_images(origins[:image_num, :, :, :], [row, row], os.path.join(save_dir, 'original' + '_epoch%03d' % epoch + '.png')) utils.save_images(samples[:image_num, :, :, :], [row, row], os.path.join(save_dir, 'random' + '_epoch%03d' % epoch + '.png'))
def train(epoch): a = time.time() model.train() b = time.time() print("train: ", b - a) a = time.time() # Horovod: set epoch to sampler for shuffling. train_sampler.set_epoch(epoch) b = time.time() print("set_epoch: ", b - a) for batch_idx, (data, target) in enumerate(train_dataset): a = time.time() # for i in range(len(data)): # print(len(data[0][0])) # data[i] = to_var(torch.stack(data[i])) # data = torch.stack(data) # target = torch.stack(target) data = to_var(torch.LongTensor(data)) # (bs, seq_len) target = to_var(torch.LongTensor(target)) # (bs,) # print( data.size(), target.size()) if args.cuda: data, target = data.cuda(), target.cuda() output = model(data) loss = F.nll_loss(output, target) optimizer.zero_grad() loss.backward() optimizer.step() b = time.time() if batch_idx % args.log_interval == 0: # Horovod: use train_sampler to determine the number of examples in # this worker's partition. if hvd.rank() == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_dataset), 100. * batch_idx / len(train_dataset), loss.item())) print("Train time: ", b - a)