def make_batch_logs(self, out, y, loss): out, y = to_np(out), to_np(y) metrics = self.compute_metrics(out, y) metrics['loss'] = to_np(loss).item() logs = {'stage': 'batch', 'metrics': metrics, 'opt_state': self.get_optimizer_parameters()} return logs
def manifold(self, epoch): save_dir = os.path.join(self.root, self.result_dir, self.dataset, self.model_name) self.load(epoch) self.G.eval() self.E.eval() self.FC.eval() color_vec = [] Z = [] color = [ 'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds', 'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu', 'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn'] for iter, (X, label) in enumerate(self.valid_loader): X = utils.to_var(X) label = utils.to_var(label) z_mu, z_sigma = self.E(self.FC(X)) X_reconstruc = self.G(z_mu) Z += [x for x in utils.to_np(z_mu)] color_vec += [x for x in utils.to_np(label)] self.G.train() self.E.train() self.FC.train() Z = np.array(Z) cmap = plt.get_cmap('gnuplot') cmap = plt.cm.jet cmaplist = [cmap(i) for i in range(cmap.N)] cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) colors = [plt.cm.jet(float(i + 1) / 10) for i in range(10)] # import matplotlib.cm as cm # colors = cm.rainbow(np.linspace(0, 2, 20)) for k in range(10): X = [] Y = [] for i, z in enumerate(Z): if color_vec[i] == k: X.append(z[0]) Y.append(z[1]) marker = ["*", "^"] ax.scatter(X, Y, c=colors[k], marker=marker[k % 2], cmap=cmap, label=str(k), s=20) # ax.scatter(Z[:5000, 0], Z[:5000, 1], c=color_vec[:5000], label= color_vec[:5000], marker='.', cmap=cmap, ) plt.legend(loc='upper right', ncol=1, borderaxespad=0.) plt.xlim(-4, 4) plt.ylim(-4, 4) plt.xticks(fontsize=20) plt.yticks(fontsize=20) fig.savefig(os.path.join(save_dir, 'Z_mu' + '_epoch%03d' % epoch + '.png'), transparent=True) plt.close()
def act(self, obs, sample=False, propensity=False): obs = torch.FloatTensor(obs).to(self.device) obs = obs.unsqueeze(0) dist = self.actor(obs) action = dist.sample() if sample else dist.mean prob = dist.log_prob(action).sum(dim=-1, keepdim=True).exp() action = action.clamp(*self.action_range) assert action.ndim == 2 and action.shape[0] == 1 if propensity: return utils.to_np(action[0]), utils.to_np(prob[0]) else: return utils.to_np(action[0])
def log_weights_distribution(self, named_params, steps_completed): if named_params is None: return for tag, value in named_params: tag = tag.replace('.', '/') if any(substring in tag for substring in self.logged_params): self.tblogger.histogram_summary(tag, to_np(value), steps_completed) if self.log_gradients: self.tblogger.histogram_summary(tag + '/grad', to_np(value.grad), steps_completed) self.tblogger.sync_to_file()
def train(epoch_idx, net, train_loader, lr, logger, n_class): net.cuda() net.train() base_params = list(map(id, net.base_net.parameters())) top_params = filter(lambda p: id(p) not in base_params, net.parameters()) optimizer = torch.optim.SGD([{ 'params': top_params }, { 'params': net.base_net.parameters(), 'lr': lr * 0.1 }], lr=lr, momentum=0.9, weight_decay=0.00004) criterion = nn.CrossEntropyLoss(ignore_index=-1) len_batch = len(train_loader) for batch_idx, (data, target) in enumerate(train_loader): data, target = data.cuda(), target.cuda() optimizer.zero_grad() score = net(data) loss = criterion(score, target) loss.backward() optimizer.step() _, predicted = score.max(1) predicted, target = to_np(predicted), to_np(target) acc, acc_cls, mean_iu = label_accuracy_score(target, predicted, n_class) info = { 'acc': acc, 'acc_cls': acc_cls, 'mean_iu': mean_iu, 'loss': loss.data[0] } for tag, value in info.items(): logger.scalar_summary(tag, value, len_batch * epoch_idx + batch_idx + 1) print(('train', batch_idx, epoch_idx)) if (epoch_idx + 1) % 10 == 0: n = (epoch_idx + 1) / 10 state = net.state_dict() torch.save(state, './deeplab_epoch_' + str(n) + '.pth')
def eval( self, mode=None, batch_size=None, output_dir=None ): # Sets the module in evaluation mode. self.NetG.eval() self.NetD.eval() if batch_size is None: batch_size = self.data_loader.batch_size nrows = batch_size // 8 viz_labels = np.array([num for _ in range(nrows) for num in range(8)]) viz_labels = torch.LongTensor(viz_labels).to(self.device) with torch.no_grad(): if self.name == 'cgan': viz_tensor = torch.randn(batch_size, self.latent_dim, device=self.device) viz_sample = self.NetG(viz_tensor, viz_labels) # generated image from random noise elif self.name == 'infogan': viz_tensor = torch.randn(batch_size, self.latent_dim, device=self.device) labels_onehot = self._to_onehot(viz_labels, dim=self.classes) z_code = torch.zeros((batch_size, self.code_dim), device=self.device) if mode is not None: for i in range(batch_size): z_code[i, mode] = 4. * i / batch_size - 2. viz_sample = self.NetG(viz_tensor, labels_onehot, z_code) viz_vector = utils.to_np(viz_tensor).reshape(batch_size, self.latent_dim) cur_time = datetime.now().strftime("%Y%m%d-%H%M%S") np.savetxt(os.path.join(output_dir, 'vec_{}.txt'.format(cur_time)), viz_vector) vutils.save_image(viz_sample, os.path.join(output_dir, 'img_{}.png'.format(cur_time)), nrow=8, normalize=True) logging.info(f'\nSaving evaluation image to {output_dir}...')
def eval(self, mode=None, batch_size=None): self.netG.eval() self.netD.eval() if batch_size is None: batch_size = self.data_loader.batch_size nrows = batch_size // 8 viz_labels = np.array([num for _ in range(nrows) for num in range(8)]) viz_labels = torch.LongTensor(viz_labels).to(self.device) with torch.no_grad(): if self.infogan: viz_tensor = torch.randn(batch_size, self.latent_dim, device=self.device) labels_onehot = self._to_onehot(viz_labels, dim=self.classes) z_style = torch.zeros((batch_size, self.style_dim), device=self.device) if mode is not None: for i in range(batch_size): z_style[i, mode] = 4. * i / batch_size - 2. viz_sample = self.netG(viz_tensor, labels_onehot, z_style) else: viz_tensor = torch.randn(batch_size, self.latent_dim, 1, 1, device=self.device) viz_sample = self.netG(viz_tensor, viz_labels) viz_vector = utils.to_np(viz_tensor).reshape(batch_size, self.latent_dim) cur_time = datetime.now().strftime("%Y%m%d-%H%M%S") np.savetxt('vec_{}.txt'.format(cur_time), viz_vector) vutils.save_image(viz_sample, 'img_{}.png'.format(cur_time), nrow=8, normalize=True)
def generateResults(encoder_decoder: EncoderDecoder, data_loader, resultFilename, input_tokens_list): idx_to_tok = encoder_decoder.lang.idx_to_tok all_output_seqs = [] all_target_seqs = [] for batch_idx, (input_idxs, target_idxs, _, _) in enumerate(tqdm(data_loader)): input_lengths = (input_idxs != 0).long().sum(dim=1) sorted_lengths, order = torch.sort(input_lengths, descending=True) input_variable = Variable(input_idxs[order, :][:, :max(input_lengths)]) target_variable = Variable(target_idxs[order, :]) output_log_probs, output_seqs = encoder_decoder( input_variable, list(sorted_lengths)) print(output_seqs.size()) all_output_seqs.extend(trim_seqs(output_seqs)) all_target_seqs.extend([list(seq[seq > 0])] for seq in to_np(target_variable)) with open(resultFilename, 'w') as fo: for seq, input_tokens in zip(all_output_seqs, input_tokens_list): print(type(seq)) #seq = seq.data.view(-1) eos_idx = seq.index(2) if 2 in seq else seq string = seq_to_string(seq[:eos_idx + 1], idx_to_tok, input_tokens=None) fo.write(string + '\n') return None
def ss_preds_var(self, obs, next_obs, action): # TODO (chongyi zheng): # do we need next_obs (forward) or action (inverse) - measure the prediction error, # or we just need to predictions - measure the prediction variance? # task identity inference - threshold or statistical hypothesis testing like: https://arxiv.org/abs/1902.09434 assert obs.shape == next_obs.shape and obs.shape[0] == next_obs.shape[0] == action.shape[0], \ "invalid transitions shapes!" # TODO (chongyi zheng): Do we need to set agent mode to evaluation before prediction? with torch.no_grad(): obs = torch.FloatTensor(obs).to(self.device) \ if not isinstance(obs, torch.Tensor) else obs.to(self.device) next_obs = torch.FloatTensor(next_obs).to(self.device) \ if not isinstance(next_obs, torch.Tensor) else next_obs.to(self.device) action = torch.FloatTensor(action).to(self.device) \ if not isinstance(action, torch.Tensor) else action.to(self.device) if len(obs.size()) == 3 or len(obs.size()) == 1: obs = obs.unsqueeze(0) next_obs = next_obs.unsqueeze(0) action = action.unsqueeze(0) # prediction variances if self.use_fwd: preds = self.ss_fwd_pred_ensem(obs, action) if self.use_inv: # (chongyi zheng): we compute logits variance here preds = self.ss_inv_pred_ensem(obs, next_obs) # (chongyi zheng): the same as equation (1) in https://arxiv.org/abs/1906.04161 preds = torch.stack(preds.chunk(self.num_ensem_comps, dim=0)) preds_var = torch.var(preds, dim=0).sum(dim=-1) return utils.to_np(preds_var)
def main(): model = Deeplab() dataset = VOC2012ClassSeg('./dataset', split='train', transform=True) val_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) # n_class = len(dataset.class_names) # model_file = '' # moda_data = torch.load(model_file) # try: # model.load_state_dict(model_data) # except Exception: # model.load_state_dict(model_data['model_state_dict']) # if torch.cuda.is_available(): # model.cuda() model.eval() label_trues, label_preds = [], [] for batch_idx, (data, target) in enumerate(val_loader): # if torch.cuda.is_available(): # data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) score = model(data) _, predicted = score.max(1) predicted = to_np(predicted) target = to_np(target) for lt, lp in zip(target, predicted): label_trues.append(lt) label_preds.append(lp) if batch_idx == 5: break n_class = 21 print(len(label_preds)) metrics = label_accuracy_score(label_trues, label_preds, n_class=n_class) metrics = np.array(metrics) metrics *= 100 print(metrics)
def show2(epoch): encoder.load_state_dict( torch.load(model_dir + 'encoder_{:d}.pkl'.format(epoch))) decoder.load_state_dict( torch.load(model_dir + 'decoder_{:d}.pkl'.format(epoch))) encoder.train() decoder.train() cover, yuv = getCoverExample('../pics_lfw/1.JPEG', params.input_size) secret = getSecretExample('../pics_lfw/4.JPEG', params.input_size) cover = transform(cover) secret = transform(secret) cover.resize_(1, 1, params.input_size, params.input_size) secret.resize_(1, 1, params.input_size, params.input_size) concat = torch.cat([secret, cover], dim=1) if params.use_cuda: concat = concat.cuda() concat = Variable(concat) stego = encoder(concat) stego = to_np(stego) stego = (((stego - stego.min()) * 255) / (stego.max() - stego.min())).astype(np.uint8) print(np.shape(stego)) stego = stego[0][0] stego = Image.fromarray(stego, 'L') stego.show() yuv[:, :, 0] = stego img = Image.fromarray(yuv, 'YCbCr') stego = img.convert('RGB') stego.save('./lfw_mssim_results/stego.bmp') stego, _ = getCoverExample('./lfw_mssim_results/stego.bmp', params.input_size) stego = transform(stego) stego.resize_(1, 1, params.input_size, params.input_size) stego = stego.cuda() stego = Variable(stego) secret2 = decoder(stego) secret2 = to_np(secret2) secret2 = (((secret2 - secret2.min()) * 255) / (secret2.max() - secret2.min())).astype(np.uint8) secret2 = secret2[0][0] secret2 = Image.fromarray(secret2, 'L') secret2.show() secret2.save('./lfw_mssim_results/secret.png')
def act(self, obs, sample=False): obs = torch.FloatTensor(obs).to(self.device) obs = obs.unsqueeze(0) dist = self.actor(obs) action = dist.sample() if sample else dist.mean action = action.clamp(*self.action_range) assert action.ndim == 2 and action.shape[0] == 1 return utils.to_np(action[0])
def act(self, obs, sample=False): grid_state = torch.from_numpy(obs).unsqueeze(0).long().to(self.device) text_state = torch.from_numpy(get_text_state(grid_state, self.indexed_embedding_map)).float().to(self.device) dist = self.actor(self.fusion((grid_state, text_state))) action = dist.sample() if sample else dist.mean action = action.clamp(*self.action_range) # assert action.ndim == 2 and action.shape[0] == 1 return utils.to_np(action[0])
def evaluate(encoder_decoder: EncoderDecoder, data_loader): loss_function = torch.nn.NLLLoss( ignore_index=0, reduce=False ) # what does this return for ignored idxs? same length output? losses = [] all_output_seqs = [] all_target_seqs = [] for batch_idx, (input_idxs, target_idxs, _, _) in enumerate(tqdm(data_loader)): input_lengths = (input_idxs != 0).long().sum(dim=1) sorted_lengths, order = torch.sort(input_lengths, descending=True) input_variable = Variable(input_idxs[order, :][:, :max(input_lengths)], volatile=True) target_variable = Variable(target_idxs[order, :], volatile=True) batch_size = input_variable.shape[0] output_log_probs, output_seqs = encoder_decoder( input_variable, list(sorted_lengths)) all_output_seqs.extend(trim_seqs(output_seqs)) all_target_seqs.extend([list(seq[seq > 0])] for seq in to_np(target_variable)) flattened_log_probs = output_log_probs.view( batch_size * encoder_decoder.decoder.max_length, -1) batch_losses = loss_function(flattened_log_probs, target_variable.contiguous().view(-1)) losses.extend(list(to_np(batch_losses))) mean_loss = len(losses) / sum(losses) """ for i in range(20): print(all_target_seqs[i]) print(all_output_seqs[i]) print('*'*80) """ bleu_score = corpus_bleu(all_target_seqs, all_output_seqs, smoothing_function=SmoothingFunction().method2) print('BLEU SCORE: ' + str(bleu_score)) return mean_loss, bleu_score
def log_weights_filter_magnitude(self, model, epoch, multi_graphs=False): """Log the L1-magnitude of the weights tensors. """ for name, param in model.state_dict().items(): if param.dim() in [4]: self.tblogger.list_summary('magnitude/filters/' + name, list(to_np(norm_filters(param))), epoch, multi_graphs) self.tblogger.sync_to_file()
def act(self, obs, sample=False): obs = torch.FloatTensor(obs).to(self.device) obs = obs.unsqueeze(0) dist = self.actor(obs) # using discrete action space action = dist.sample() if sample else dist.probs.argmax(dim=-1, keepdim=True) # action = action.clamp(*self.action_range) # assert action.ndim == 2 and action.shape[0] == 1 return utils.to_np(action[0])
def act(self, observation, desired_goal, sample=False): observation = torch.FloatTensor(observation).to( self.device).unsqueeze(0) desired_goal = torch.FloatTensor(desired_goal).to( self.device).unsqueeze(0) dist = self.actor(observation, desired_goal) action = dist.sample() if sample else dist.mean action = action.clamp(*self.action_range) assert action.ndim == 2 and action.shape[0] == 1 return utils.to_np(action[0])
def get_mse(self, epoch): # self.load(epoch) self.G.eval() self.E.eval() self.FC.eval() self.C.eval() critirion = nn.MSELoss() count = 0 test_pred = [] test_true = [] for X, labels in self.valid_loader: count += 1 X = utils.to_var(X) labels = utils.to_var(labels) mu, sigma = self.E(self.FC(X)) z_class = self.C(mu) X_hat = self.G(mu) loss = (X_hat.view(X_hat.size(0), -1).cpu().data.numpy() - X.view(X.size(0), -1).cpu().data.numpy()) ** 2 loss = np.mean(loss, 1) _, test_argmax = torch.max(z_class, 1) if count == 1: final_loss = loss test_pred = utils.to_np(test_argmax.squeeze()) print(test_pred) test_true = utils.to_np(labels.squeeze()) print(test_true) else: final_loss = np.concatenate((final_loss, loss), 0) test_pred = np.concatenate((test_pred, utils.to_np(test_argmax.squeeze())), axis=0) test_true = np.concatenate((test_true, utils.to_np(labels.squeeze())), axis=0) print(final_loss.shape) test_accuracy = metrics.accuracy_score(test_true, test_pred) print("Final mse mean is %.5f, std is %.5f" % (np.mean(final_loss), np.std(final_loss))) print("Accuracy is %.5f" % (test_accuracy))
def test(epoch_idx, net, test_loader, logger, n_class): net.cuda() net.eval() len_batch = len(test_loader) visualizations = [] hist = np.zeros((n_class, n_class)) with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.cuda(), targets.cuda() output = net(inputs) _, predicted = output.max(1) predicted, targets = to_np(predicted), to_np(targets) print(('test', batch_idx, epoch_idx)) hist += fast_hist(targets.flatten(), predicted.flatten(), n_class) miou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) miou = np.sum(miou) / len(miou) logger.scalar_summary('Mean iou', miou, epoch_idx) print(('Mean iou: ', miou))
def act(self, obs, sample=False): pix_obs = torch.FloatTensor(obs['pix_obs']).to(self.device) pix_obs = pix_obs.unsqueeze(0) state_low_obs = None if self.lstate_shape != 0: state_low_obs = torch.FloatTensor(obs['state_low_obs']).to( self.device) state_low_obs = torch.unsqueeze(state_low_obs, 0) dist = self.actor(pix_obs, state_low_obs) action = dist.sample() if sample else dist.mean action = action.clamp(*self.action_range) assert action.ndim == 2 and action.shape[0] == 1 return utils.to_np(action[0])
def multi_step(self, agent, n_steps=None, single_episode=False, video_file_suffix=None): """ Performs multiple steps (either n_steps or a single episode) in the environment and returns tensors with all the (s, a, ns) transitions. Either n_steps or single_episode must be specified. Args: agent (object): agent with get_action(state) method returning an action for the agent n_steps (int, optional, default=None): number of steps to take in the environment single_episode (boolean, optional, default=False): whether to perform only one episode video_file_suffix (string, optional, default=None): Suffix added to the end of the video file name Returns: (s, a, s') staked transitions staked all_old_states (torch Tensor[n_steps, d_state]) staked all_actions (torch Tensor[n_steps, d_action]) staked all_next_states (torch Tensor[n_steps, d_state]) """ assert (n_steps is None) ^ (single_episode is False) all_old_states = [] all_next_states = [] all_actions = [] if single_episode: n_steps = sys.maxsize for i in range(1, n_steps + 1): # FIXME: are the view-numel things necessary? action = agent.get_action(self.state.view( 1, self.state.numel())).to('cpu') if self.torch_np_conversion: state, next_state, done = self.step( to_np(action), video_file_suffix=video_file_suffix) else: state, next_state, done = self.step( action, video_file_suffix=video_file_suffix) all_old_states.extend(state.view(1, state.numel())) all_actions.extend(action.view(1, action.numel())) all_next_states.extend(next_state.view(1, next_state.numel())) if single_episode and done: break return torch.stack(all_old_states), torch.stack( all_actions), torch.stack(all_next_states)
def __getitem__(self, index): image_name = self.image_list[index] label_name = self.label_list[index] image = Image.open(os.path.join(self.image_path, image_name)) label = Image.open(os.path.join(self.label_path, label_name)) # Transform image, label in forms if self.transform is not None: image = self.transform(image) label = to_tensor(to_np(label)).long() return image, label
def act(self, obs, exploration=False): # TODO (chongyi zheng) if exploration and np.random.rand() < self.exploration_rate: action = np.random.randint(self.action_shape) else: with torch.no_grad(): obs = torch.FloatTensor(obs).to(self.device) obs = obs.unsqueeze(0) prob, _ = self.q_net(obs) q_values = (prob * self.atoms).sum(-1) # greed action action = utils.to_np(q_values.argmax(dim=-1)) return action
def _request_transformation(self, _from, _to): topic = 'FrameTransformation.{}.{}'.format(_from, _to) self.subscription.subscribe(topic) try: msg = self.channel.consume(timeout=5.0) self.subscription.unsubscribe(topic) except: self.subscription.unsubscribe(topic) return False transformation = msg.unpack(FrameTransformation) if _from not in self.transformations: self.transformations[_from] = {} self.transformations[_from][_to] = to_np(transformation.tf) return True
def act(self, obs, sample=False): obs = torch.FloatTensor(obs).to(self.device) obs = obs.unsqueeze(0) if self.aug_type == "crop": obs = self.center_crop(obs) elif self.aug_type == "translate": pad = (self.image_size - obs.shape[-1]) // 2 obs = F.pad(obs, [pad, pad, pad, pad]) else: raise ValueError(self.aug_type) dist = self.actor(obs) action = dist.sample() if sample else dist.mean action = action.clamp(*self.action_range) assert action.ndim == 2 and action.shape[0] == 1 return utils.to_np(action[0])
def fit_self(self, X): logging.debug( f'Reconstructor fit_self() called. Begin to fit {X.shape}') X = normalize(X, axis=1) # unit length num_sample = len(X) dim = X.shape[1] all_idx = np.arange(num_sample) # the coef matrix to return result_matrix = np.zeros([num_sample, num_sample]) # the coef matrix in torch result_matrix_t = torch.zeros(num_sample, num_sample).to(self.device) # the X in torch X_t = torch.from_numpy(X).to(self.device) # for each column ticked out for i1 in tnrange(num_sample, desc='fit_self', leave=False): # idx ticked this_idx = np.delete(all_idx, i1).tolist() if dim >= num_sample: coef, _ = self.gels(X_t[this_idx, :], X_t[i1, :].unsqueeze(0)) coef.squeeze_() else: coef, _ = self.lrfit(X[this_idx, :], X[i1, :].reshape(1, dim)) coef = torch.from_numpy(coef).squeeze().to(self.device) # get the largest ones val, idx = coef.abs().topk(K) # reshape tmp = torch.zeros_like(coef) tmp.scatter_(0, idx, val * coef[idx].sign()) # assign result_matrix_t[i1, this_idx] = tmp # as np result_matrix = utils.to_np(result_matrix_t) return result_matrix
def visualize_results(self, epoch, N=0): """ visualize the sampled results and save them""" save_path = os.path.join(self.save_path, 'visualization') if not os.path.exists(save_path): os.makedirs(save_path) samples = self.model.sample_fixed() if N == 0 else self.model.sampleN( N) samples_grid = self._make_grid(samples) plt.imshow(utils.to_np(samples_grid).transpose(1, 2, 0)) plt.show() torchvision.utils.save_image(samples_grid, filename=os.path.join( save_path, 'epoch%03d' % epoch + '.png')) logging.info('Sampled images saved.') return self
def visualize_results(self, epoch): # self.load(199) print("visulize..") self.G.eval() self.E.eval() self.FC.eval() save_dir = os.path.join(self.root, self.result_dir, 'mixed_gaussian', self.model_name, str(self.args.seed_random)) print(save_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) # Store results Recon = [] Original = [] Z = [] Random = [] color_vec = [] for iter, (X, label) in enumerate(self.valid_loader): z = utils.to_var(torch.randn(self.batch_size, self.z_dim)) X = utils.to_var(X) label = utils.to_var(label) z_mu, z_sigma = self.E(self.FC(X)) X_reconstruc = self.G(z_mu) X_random = self.G(z) Original += [x for x in utils.to_np(X)] Recon += [x for x in utils.to_np(X_reconstruc)] Z += [x for x in utils.to_np(z_mu)] Random += [x for x in utils.to_np(X_random)] color_vec += [x for x in utils.to_np(label)] self.G.train() self.E.train() self.FC.train() Original = np.array(Original) Recon = np.array(Recon) Z = np.array(Z) Random = np.array(Random) self.count(Random[:2500]) cmap = plt.get_cmap('gnuplot') cmap = plt.cm.jet cmaplist = [cmap(i) for i in range(cmap.N)] cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) plt.xlim(-6, 6) plt.ylim(-6, 6) plt.xticks(fontsize=20) plt.yticks(fontsize=20) ax.scatter(Original[:, 0], Original[:, 1], c=color_vec, marker='.', cmap=cmap, alpha=0.3) fig.savefig(os.path.join(save_dir, 'X_original' + '_epoch%03d' % epoch + '.png'), transparent=True) plt.close() fig, ax = plt.subplots(1, 1, figsize=(6, 6)) plt.xlim(-6, 6) plt.ylim(-6, 6) plt.xticks(fontsize=20) plt.yticks(fontsize=20) ax.scatter(Recon[:10000, 0], Recon[:10000, 1], c=color_vec[:10000], marker='.', cmap=cmap, alpha=0.3) fig.savefig(os.path.join( save_dir, 'X_reconstruc' + '_epoch%03d' % epoch + '.png'), transparent=True) plt.close() fig, ax = plt.subplots(1, 1, figsize=(6, 6)) plt.xlim(-6, 6) plt.ylim(-6, 6) plt.xticks(fontsize=20) plt.yticks(fontsize=20) ax.scatter(Random[:10000, 0], Random[:10000, 1], color='black', marker='.', alpha=0.3) fig.savefig(os.path.join(save_dir, 'X_random' + '_epoch%03d' % epoch + '.png'), transparent=True) plt.close() fig, ax = plt.subplots(1, 1, figsize=(6, 6)) plt.xlim(-3, 3) plt.ylim(-3, 3) ax.set_xticks([-3, -2, -1, 0, 1, 2, 3]) ax.set_yticks([-3, -2, -1, 0, 1, 2, 3]) plt.xticks(fontsize=20) plt.yticks(fontsize=20) ax.scatter(Z[:, 0], Z[:, 1], c=color_vec, marker='.', cmap=cmap, alpha=0.3) fig.savefig(os.path.join(save_dir, 'Z_mu' + '_epoch%03d' % epoch + '.png'), transparent=True) plt.close()
def train(model, data_loader, optimizer, epoch, writer): """ Train CapsuleNet model on training set Args: model: The CapsuleNet model. data_loader: An interator over the dataset. It combines a dataset and a sampler. optimizer: Optimization algorithm. epoch: Current epoch. """ print('===> Training mode') num_batches = len(data_loader) # iteration per epoch. e.g: 469 total_step = args.epochs * num_batches epoch_tot_acc = 0 # Switch to train mode model.train() if args.cuda: # When we wrap a Module in DataParallel for multi-GPUs model = model.module start_time = timer() for batch_idx, (data, target) in enumerate(tqdm(data_loader, unit='batch')): batch_size = data.size(0) global_step = batch_idx + (epoch * num_batches) - num_batches labels = target target_one_hot = utils.one_hot_encode(target, length=args.num_classes) assert target_one_hot.size() == torch.Size([batch_size, 10]) data, target = Variable(data), Variable(target_one_hot) if args.cuda: data = data.cuda() target = target.cuda() # Train step - forward, backward and optimize optimizer.zero_grad() output = model(data) # output from DigitCaps (out_digit_caps) loss, margin_loss, recon_loss = model.loss(data, output, target) loss.backward() optimizer.step() # Calculate accuracy for each step and average accuracy for each epoch acc = utils.accuracy(output, labels, args.cuda) epoch_tot_acc += acc epoch_avg_acc = epoch_tot_acc / (batch_idx + 1) # TensorBoard logging # 1) Log the scalar values writer.add_scalar('train/total_loss', loss.data[0], global_step) writer.add_scalar('train/margin_loss', margin_loss.data[0], global_step) if args.use_reconstruction_loss: writer.add_scalar('train/reconstruction_loss', recon_loss.data[0], global_step) writer.add_scalar('train/batch_accuracy', acc, global_step) writer.add_scalar('train/accuracy', epoch_avg_acc, global_step) # 2) Log values and gradients of the parameters (histogram) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') writer.add_histogram(tag, utils.to_np(value), global_step) writer.add_histogram(tag + '/grad', utils.to_np(value.grad), global_step) # Print losses if batch_idx % args.log_interval == 0: template = 'Epoch {}/{}, ' \ 'Step {}/{}: ' \ '[Total loss: {:.6f},' \ '\tMargin loss: {:.6f},' \ '\tReconstruction loss: {:.6f},' \ '\tBatch accuracy: {:.6f},' \ '\tAccuracy: {:.6f}]' tqdm.write( template.format( epoch, args.epochs, global_step, total_step, loss.data[0], margin_loss.data[0], recon_loss.data[0] if args.use_reconstruction_loss else 0, acc, epoch_avg_acc)) # Print time elapsed for an epoch end_time = timer() print('Time elapsed for epoch {}: {:.0f}s.'.format(epoch, end_time - start_time))
optimizer.step() training_loss += loss.cpu().data.numpy()[0] * float(inputs.size(0)) train_acc = counter.acc() counter.flush() test_loss = 0 net.eval() utils.set_strategy(net, 'running') for _, (inputs, labels) in enumerate(testloader): inputs, labels = Variable(inputs.cuda(async=True)), Variable( labels.cuda(async=True)) outputs = net(inputs) loss = criterion(outputs, labels) test_loss += utils.to_np(loss) * float(inputs.size(0)) counter.add(utils.to_np(outputs), utils.to_np(labels)) print( ' -- Epoch %d | time: %.4f | loss: %.4f | training acc: %.4f validation accuracy: %.4f | lr %.6f --' % (epoch, time() - t0, training_loss, train_acc, counter.acc(), lr)) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': net.module.state_dict() if use_cuda else net.state_dict(), 'test_accuracy': counter.acc(), 'optimizer': optimizer.state_dict(), 'name': args.model, 'model_args': model_args,