def check_forward(self, log_pi_data, tau): log_pi = chainer.Variable(log_pi_data) y = functions.gumbel_softmax(log_pi, tau=tau) # Only checks dtype and shape because its result contains noise self.assertEqual(y.dtype, numpy.float32) self.assertEqual(y.shape, log_pi.shape) self.assertEqual(cuda.get_array_module(y), cuda.get_array_module(log_pi))
def check_forward(self, log_pi_data, tau): log_pi = chainer.Variable(log_pi_data) y = functions.gumbel_softmax(log_pi, tau=tau) # Only checks dtype and shape because its result contains noise self.assertEqual(y.dtype, numpy.float32) self.assertEqual(y.shape, log_pi.shape) self.assertEqual( cuda.get_array_module(y), cuda.get_array_module(log_pi))
def test(iterator, gpu, timesteps, encoder, decoder, rel_send, rel_rec, edge_types, temp, var): nll_test = [] kl_test = [] edge_accuracies = [] node_mses = [] chainer.config.train = False chainer.config.enable_backprop = False while True: inputs = iterator.next() node_features, edge_labels = dataset.concat_examples(inputs, device=gpu) data_encoder = node_features[:, :, :timesteps, :] data_decoder = node_features[:, :, -timesteps:, :] # logits: [batch_size, num_edges, edge_types] logits = encoder(data_encoder, rel_send, rel_rec) # inverse func. of softmax edges = F.gumbel_softmax(logits, tau=temp, axis=2) edge_probs = F.softmax(logits, axis=2) # edges, edge_probs: [batch_size, num_edges, edge_types] # validation output uses teacher forcing output = decoder(data_decoder, edges, rel_rec, rel_send, 1) target = data_decoder[:, :, 1:, :] num_nodes = node_features.shape[1] loss_nll = get_nll_gaussian(output, target, var) loss_kl = get_kl_categorical_uniform(edge_probs, num_nodes, edge_types) nll_test.append(float(loss_nll.array)) kl_test.append(float(loss_kl.array)) edge_accuracy = get_edge_accuracy(logits.array, edge_labels) edge_accuracies.append(edge_accuracy) node_mse = float(F.mean_squared_error(output, target).array) node_mses.append(node_mse) if iterator.is_new_epoch: break put_log(iterator.epoch, np.mean(nll_test), np.mean(kl_test), np.mean(edge_accuracies), np.mean(node_mses), 'test') chainer.config.train = True chainer.config.enable_backprop = True
def predict(self, xs): # Encoding logits, exs = self._encode(xs) # Discretization D = F.gumbel_softmax(logits, self.tau, axis=2) gumbel_output = D.reshape(-1, self.M * self.K) with chainer.no_backprop_mode(): maxp = F.mean(F.max(D, axis=2)) reporter.report({'maxp': maxp.data}, self) # Decoding y_hat = self._decode(gumbel_output) return y_hat, exs
def get_dealer_sampling(N_pic=100, imgH=64, imgW=64, N_card=4): thres = [0.99995, 0.9999, 0.9998, 0.9995] #*512で13,26,52,131個相当 #<ランダム点画像の生成> img_r = xp.random.rand(N_pic, imgW * imgH).astype(np.float32) #100枚分の0-1乱数作成 img_p = xp.zeros( (N_card, N_pic, imgW * imgH)).astype(np.float32) #4*100枚分のイメージメモリ確保 for i, thre in enumerate(thres): #閾値よりも高いものだけ1を代入 img_p[i][img_r >= thre] = 1 #点画像変形 (N_card, N_pic, imgW*imgH,) ⇒ (N_pic, imgW*imgH, N_card) img_p = chainer.Variable(img_p.transpose((1, 2, 0))) #<サンプリング係数の生成> #100個の「1」を作成 x_one = xp.ones((N_pic, 1), dtype=np.float32) #「1」をディーラーを通したあとsoftmaxで0-1確率にする card_prob = F.softmax(Md['de'](x_one)) #gumbel_softmaxを通してサンプリング card_gum = F.gumbel_softmax(F.log(card_prob), tau=0.2) #サンプリング係数の画像化 (N_pic, N_card) ⇒ (N_pic, imgW*imgH, N_card) card_gum_b = F.broadcast_to(F.reshape(card_gum, (N_pic, 1, N_card)), img_p.shape) #<ランダム点画像とサンプリング係数画像の合成> #ランダム点画像とサンプリング係数をかけて、合成(sum)し、2次元画像へ変形 img_p_sum = F.reshape(F.sum(img_p * card_gum_b, axis=2), (N_pic, 1, imgH, imgW)) #点⇒ガウス球へ変形 img_core = Md['decon_core'](img_p_sum) * 255 img_core = F.broadcast_to(img_core, (N_pic, 3, imgH, imgW)) return img_core
chainer.config.train = False chainer.config.enable_backprop = False for i in range(5): inputs = test_iter.next() node_features, edge_labels = dataset.concat_examples(inputs, device=args.gpu) data_encoder = node_features[:, :, :train_args['timesteps'], :] data_decoder = node_features[:, :, train_args['timesteps']:, :] # logits: [batch_size, num_edges, edge_types] logits = encoder(data_encoder, rel_send, rel_rec) # inverse func. of softmax edges = F.gumbel_softmax(logits, tau=train_args['temp'], axis=2) # edge sampling edge_probs = F.softmax(logits, axis=2) # edges, edge_probs: [batch_size, num_edges, edge_types] # validation output uses teacher forcing output = decoder(data_decoder, edges, rel_rec, rel_send, data_decoder.shape[2]) fig = plt.figure() plt.tight_layout() plt.xlabel('x') plt.ylabel('y') plt.title( 'transparent: given, solid: prediction, dashed: ground-truth') prop_cycle = plt.rcParams['axes.prop_cycle'] colors = prop_cycle.by_key()['color']
def train( iterator, gpu, encoder, decoder, enc_optim, dec_optim, rel_send, rel_rec, edge_types, temp, prediction_steps, var, out, benchmark, lr_decay, gamma): iter_i = 0 edge_accuracies = [] node_mses = [] nll_train = [] kl_train = [] logger = logging.getLogger(__name__) while True: inputs = iterator.next() node_features, edge_labels = dataset.concat_examples(inputs, device=gpu) # logits: [batch_size, num_edges, edge_types] logits = encoder(node_features, rel_send, rel_rec) # inverse func. of softmax edges = F.gumbel_softmax(logits, tau=temp, axis=2) edge_probs = F.softmax(logits, axis=2) # edges, edge_probs: [batch_size, num_edges, edge_types] if isinstance(decoder, decoders.MLPDecoder): output = decoder( node_features, edges, rel_rec, rel_send, prediction_steps) elif isinstance(decoder, decoders.RNNDecoder): output = decoder( node_features, edges, rel_rec, rel_send, 100, burn_in=True, burn_in_steps=args.timesteps - args.prediction_steps) target = node_features[:, :, 1:, :] num_nodes = node_features.shape[1] loss_nll = get_nll_gaussian(output, target, var) loss_kl = get_kl_categorical_uniform(edge_probs, num_nodes, edge_types) loss = loss_nll + loss_kl nll_train.append(float(loss_nll.array)) kl_train.append(float(loss_kl.array)) edge_accuracy = get_edge_accuracy(logits.array, edge_labels) edge_accuracies.append(edge_accuracy) node_mse = float(F.mean_squared_error(output, target).array) node_mses.append(node_mse) encoder.cleargrads() decoder.cleargrads() loss.backward() enc_optim.update() dec_optim.update() # Exit after 10 iterations when benchmark mode is ON iter_i += 1 if benchmark: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) if iter_i == 10: exit() if iterator.is_new_epoch: break if not os.path.exists(os.path.join(out, 'graph.dot')): with open(os.path.join(out, 'graph.dot'), 'w') as o: g = computational_graph.build_computational_graph([loss]) o.write(g.dump()) if iterator.is_new_epoch: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) serializers.save_npz(os.path.join(out, 'encoder_epoch-{}.npz'.format(iterator.epoch)), encoder) serializers.save_npz(os.path.join(out, 'decoder_epoch-{}.npz'.format(iterator.epoch)), decoder) serializers.save_npz(os.path.join(out, 'enc_optim_epoch-{}.npz'.format(iterator.epoch)), enc_optim) serializers.save_npz(os.path.join(out, 'dec_optim_epoch-{}.npz'.format(iterator.epoch)), dec_optim) if iterator.epoch % lr_decay == 0: enc_optim.alpha *= gamma dec_optim.alpha *= gamma logger.info('alpha of enc_optim: {}'.format(enc_optim.alpha)) logger.info('alpha of dec_optim: {}'.format(dec_optim.alpha))
def train(iterator, gpu, encoder, decoder, enc_optim, dec_optim, rel_send, rel_rec, edge_types, temp, prediction_steps, var, out, benchmark, lr_decay, gamma): iter_i = 0 edge_accuracies = [] node_mses = [] nll_train = [] kl_train = [] logger = logging.getLogger(__name__) while True: inputs = iterator.next() node_features, edge_labels = dataset.concat_examples(inputs, device=gpu) # logits: [batch_size, num_edges, edge_types] logits = encoder(node_features, rel_send, rel_rec) # inverse func. of softmax edges = F.gumbel_softmax(logits, tau=temp, axis=2) edge_probs = F.softmax(logits, axis=2) # edges, edge_probs: [batch_size, num_edges, edge_types] if isinstance(decoder, decoders.MLPDecoder): output = decoder(node_features, edges, rel_rec, rel_send, prediction_steps) elif isinstance(decoder, decoders.RNNDecoder): output = decoder(node_features, edges, rel_rec, rel_send, 100, burn_in=True, burn_in_steps=args.timesteps - args.prediction_steps) target = node_features[:, :, 1:, :] num_nodes = node_features.shape[1] loss_nll = get_nll_gaussian(output, target, var) loss_kl = get_kl_categorical_uniform(edge_probs, num_nodes, edge_types) loss = loss_nll + loss_kl nll_train.append(float(loss_nll.array)) kl_train.append(float(loss_kl.array)) edge_accuracy = get_edge_accuracy(logits.array, edge_labels) edge_accuracies.append(edge_accuracy) node_mse = float(F.mean_squared_error(output, target).array) node_mses.append(node_mse) encoder.cleargrads() decoder.cleargrads() loss.backward() enc_optim.update() dec_optim.update() # Exit after 10 iterations when benchmark mode is ON iter_i += 1 if benchmark: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) if iter_i == 10: exit() if iterator.is_new_epoch: break if not os.path.exists(os.path.join(out, 'graph.dot')): with open(os.path.join(out, 'graph.dot'), 'w') as o: g = computational_graph.build_computational_graph([loss]) o.write(g.dump()) if iterator.is_new_epoch: put_log(iterator.epoch, np.mean(nll_train), np.mean(kl_train), np.mean(edge_accuracies), np.mean(node_mses)) serializers.save_npz( os.path.join(out, 'encoder_epoch-{}.npz'.format(iterator.epoch)), encoder) serializers.save_npz( os.path.join(out, 'decoder_epoch-{}.npz'.format(iterator.epoch)), decoder) serializers.save_npz( os.path.join(out, 'enc_optim_epoch-{}.npz'.format(iterator.epoch)), enc_optim) serializers.save_npz( os.path.join(out, 'dec_optim_epoch-{}.npz'.format(iterator.epoch)), dec_optim) if iterator.epoch % lr_decay == 0: enc_optim.alpha *= gamma dec_optim.alpha *= gamma logger.info('alpha of enc_optim: {}'.format(enc_optim.alpha)) logger.info('alpha of dec_optim: {}'.format(dec_optim.alpha))