def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path x1 = nn.Variable([args.batch_size, 1, 56, 56]) #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1]) #z = vectorizer(x1,maxh = 1024) #fake = generator(z,maxh= 1024) z_vec = vectorizer(x1) z = z_vec.unlinked() #fake2 = generator(z_vec,maxh=512) #fake = generator(z,maxh=512) fake2 = generator(z_vec) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) print fake2.d.shape print x1.d.shape loss_vec = F.mean(F.squared_error(fake2, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) xBuf1 = nn.Variable([args.batch_size, 1, 56, 56]) zBuf1 = vectorizer(xBuf1) xBuf2 = nn.Variable([args.batch_size, 1, 56, 56]) zBuf2 = vectorizer(xBuf2) # Real path x = nn.Variable([args.batch_size, 1, 56, 56]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) with nn.parameter_scope("vec"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. * 2 - 1.0 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() fake2.forward() monitor_vec1.add(i, fake2) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) image, _ = data.next() x.d = image / 255. * 2 - 1.0 # [0, 255] to [-1, 1] #z.d = np.random.randn(*z.shape) ratio = np.random.rand() image, _ = data.next() xBuf1.d = image / 255. * 2 - 1.0 # [0, 255] to [-1, 1] zBuf1.forward() image, _ = data.next() xBuf2.d = image / 255. * 2 - 1.0 # [0, 255] to [-1, 1] zBuf2.forward() z.d = (1 - ratio) * zBuf1.d + ratio * zBuf2.d # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) with nn.parameter_scope("vec"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i))
def volumetric_rendering(radiance_field, ray_origins, depth_values, return_weights=False, white_bkgd=False, raw_noise_std=0.0, apply_act=False): """Integration of volumetric rendering Args: radiance_field (nn.Variable or nn.NdArray): Shape is (height, width, num_samples, 4). radiance_field[:,:,:,:3] correspond to rgb value at each sampled point while radiance_field[:,:,:,-1] refers to color density. ray_origins (nn.Variable or nn.NdArray): Shape is (height, width, 3) depth_values (nn.Variable or nn.NdArray): Shape is (num_samples, 1) or (height, width, num_samples) return_weights (bool, optional): Set to true if the coefficients of the volumetric integration sum are to be returned . Defaults to False. Returns: rgb_map (nn.Variable or nn.NdArray): Shape is (height, width, 3) rgb_map (nn.Variable or nn.NdArray): Shape is (height, width, 1) """ if apply_act: sigma = F.relu(radiance_field[..., 3]) rgb = F.sigmoid(radiance_field[..., :3]) else: sigma = radiance_field[..., 3] rgb = radiance_field[..., :3] if raw_noise_std > 0.0: noise = F.randn(shape=sigma.shape) sigma += (noise * raw_noise_std) if depth_values.ndim == 2: distances = depth_values[:, 1:] - depth_values[:, :-1] distances = F.concatenate(distances, F.constant(1e2, shape=depth_values.shape[:-1] + (1, )), axis=-1) alpha = 1. - F.exp(-sigma * distances) weights = alpha * F.cumprod(1 - alpha + 1e-10, axis=-1, exclusive=True) rgb_map = F.sum(weights[..., None] * rgb, axis=-2) depth_map = F.sum(weights * depth_values, axis=-1) acc_map = F.sum(weights, axis=-1) else: distances = depth_values[:, :, 1:] - depth_values[:, :, :-1] distances = F.concatenate(distances, F.constant(1e10, shape=depth_values.shape[:-1] + (1, )), axis=-1) alpha = 1. - F.exp(-sigma * distances) rgb_map = F.sum(weights[..., None] * rgb, axis=rgb.ndim - 2) depth_map = F.sum(weights * depth_values, axis=1) acc_map = F.sum(weights, axis=-1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map[..., None]) if return_weights: disp_map = 1.0 / \ F.maximum2(F.constant(1e-10, depth_map.shape), depth_map / acc_map) return rgb_map, depth_map, acc_map, disp_map, weights return rgb_map, depth_map, acc_map
def volume_rendering_transient(radiance_field, ray_origins, depth_values, return_weights=False, white_bkgd=False, raw_noise_std=0.0, beta_min=0.1): static_rgb = radiance_field[..., :3] static_sigma = radiance_field[..., 3] if radiance_field.shape[-1] > 4: transient_rgb = radiance_field[..., 4:7] transient_sigma = radiance_field[..., 7] transient_beta = radiance_field[..., 8] distances = depth_values[:, 1:] - depth_values[:, :-1] distances = F.concatenate(distances, F.constant(1e2, shape=depth_values.shape[:-1] + (1, )), axis=-1) static_alpha = 1. - F.exp(-static_sigma * distances) if radiance_field.shape[-1] > 4: transient_alpha = 1. - F.exp(-transient_sigma * distances) alpha = 1. - F.exp(-(static_sigma + transient_sigma) * distances) transmittance = F.cumprod(1-static_alpha+1e-10, axis=-1, exclusive=True) * \ F.cumprod(1-transient_alpha+1e-10, axis=-1, exclusive=True) else: alpha = static_alpha transmittance = F.cumprod(1 - static_alpha + 1e-10, axis=-1, exclusive=True) # weights = alpha * F.cumprod(1-alpha+1e-10, axis=-1, exclusive=True) static_weights = static_alpha * transmittance if radiance_field.shape[-1] > 4: transient_weights = transient_alpha * transmittance weights = alpha * transmittance static_rgb_map = F.sum(static_weights[..., None] * static_rgb, axis=-2) if isinstance(radiance_field, nn.Variable) and radiance_field.shape[-1] > 4: transient_rgb_map = F.sum(transient_weights[..., None] * transient_rgb, axis=-2) rgb_map = static_rgb_map + transient_rgb_map beta = F.sum(transient_weights * transient_beta, axis=-1) beta += beta_min acc_map = F.sum(weights, axis=-1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map[..., None]) elif isinstance(radiance_field, nn.NdArray) and radiance_field.shape[-1] > 4: transient_rgb_map = F.sum(transient_weights[..., None] * transient_rgb, axis=-2) rgb_map = static_rgb_map + transient_rgb_map static_weights = static_alpha * \ F.cumprod(1-static_alpha+1e-10, axis=-1, exclusive=True) static_rgb_map = F.sum(static_weights[..., None] * static_rgb, axis=-2) transient_weights = transient_alpha * \ F.cumprod(1-transient_alpha+1e-10, axis=-1, exclusive=True) transient_rgb_map = F.sum(transient_weights[..., None] * transient_rgb, axis=-2) beta = F.sum(transient_weights * transient_beta, axis=-1) + beta_min # rgb_map = static_rgb_map + transient_rgb_map acc_map = F.sum(weights, axis=-1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map[..., None]) else: acc_map = F.sum(static_weights, axis=-1) # depth_map = F.sum(weights*depth_values, axis=-1) if white_bkgd: static_rgb_map = static_rgb_map + (1. - acc_map[..., None]) if return_weights: return static_rgb_map, static_weights return rgb_map, weights, static_rgb_map, transient_rgb_map, beta
def train_transformer(config, netG, netD, solver_netG, solver_netD, train_iterators, monitor): netG_A2B, netG_B2A = netG['netG_A2B'], netG['netG_B2A'] netD_A, netD_B = netD['netD_A'], netD['netD_B'] solver_netG_AB, solver_netG_BA = solver_netG['netG_A2B'], solver_netG[ 'netG_B2A'] solver_netD_A, solver_netD_B = solver_netD['netD_A'], solver_netD['netD_B'] train_iterator_src, train_iterator_trg = train_iterators if config["train"][ "cycle_loss"] and config["train"]["cycle_loss"]["lambda"] > 0: print( f'Applying Cycle Loss, weight: {config["train"]["cycle_loss"]["lambda"]}.' ) with_cycle_loss = True else: with_cycle_loss = False if config["train"][ "shape_loss"] and config["train"]["shape_loss"]["lambda"] > 0: print( f'Applying Shape Loss using PCA, weight: {config["train"]["shape_loss"]["lambda"]}.' ) with_shape_loss = True else: with_shape_loss = False # Load boundary image to get Variable shapes bod_map_A = train_iterator_src.next()[0] bod_map_B = train_iterator_trg.next()[0] real_bod_map_A = nn.Variable(bod_map_A.shape) real_bod_map_B = nn.Variable(bod_map_B.shape) real_bod_map_A.persistent, real_bod_map_B.persistent = True, True ################### Graph Construction #################### # Generator with nn.parameter_scope('netG_transformer'): with nn.parameter_scope('netG_A2B'): fake_bod_map_B = netG_A2B( real_bod_map_A, test=False, norm_type=config["norm_type"]) # (1, 15, 64, 64) with nn.parameter_scope('netG_B2A'): fake_bod_map_A = netG_B2A( real_bod_map_B, test=False, norm_type=config["norm_type"]) # (1, 15, 64, 64) fake_bod_map_B.persistent, fake_bod_map_A.persistent = True, True fake_bod_map_B_unlinked = fake_bod_map_B.get_unlinked_variable() fake_bod_map_A_unlinked = fake_bod_map_A.get_unlinked_variable() # Reconstruct images if cycle loss is applied. if with_cycle_loss: with nn.parameter_scope('netG_transformer'): with nn.parameter_scope('netG_B2A'): recon_bod_map_A = netG_B2A( fake_bod_map_B_unlinked, test=False, norm_type=config["norm_type"]) # (1, 15, 64, 64) with nn.parameter_scope('netG_A2B'): recon_bod_map_B = netG_A2B( fake_bod_map_A_unlinked, test=False, norm_type=config["norm_type"]) # (1, 15, 64, 64) recon_bod_map_A.persistent, recon_bod_map_B.persistent = True, True # Discriminator with nn.parameter_scope('netD_transformer'): with nn.parameter_scope('netD_A'): pred_fake_A = netD_A(fake_bod_map_A_unlinked, test=False) pred_real_A = netD_A(real_bod_map_A, test=False) with nn.parameter_scope('netD_B'): pred_fake_B = netD_B(fake_bod_map_B_unlinked, test=False) pred_real_B = netD_B(real_bod_map_B, test=False) real_target = F.constant(1, pred_fake_A.shape) fake_target = F.constant(0, pred_real_A.shape) ################### Loss Definition #################### # Generator loss # LSGAN loss loss_gan_A = lsgan_loss(pred_fake_A, real_target) loss_gan_B = lsgan_loss(pred_fake_B, real_target) loss_gan_A.persistent, loss_gan_B.persistent = True, True loss_gan = loss_gan_A + loss_gan_B # Cycle loss if with_cycle_loss: loss_cycle_A = recon_loss(recon_bod_map_A, real_bod_map_A) loss_cycle_B = recon_loss(recon_bod_map_B, real_bod_map_B) loss_cycle_A.persistent, loss_cycle_B.persistent = True, True loss_cycle = loss_cycle_A + loss_cycle_B # Shape loss if with_shape_loss: with nn.parameter_scope("Align"): nn.load_parameters( config["train"]["shape_loss"]["align_param_path"]) shape_bod_map_real_A = models.align_resnet(real_bod_map_A, fix_parameters=True) shape_bod_map_fake_B = models.align_resnet(fake_bod_map_B_unlinked, fix_parameters=True) shape_bod_map_real_B = models.align_resnet(real_bod_map_B, fix_parameters=True) shape_bod_map_fake_A = models.align_resnet(fake_bod_map_A_unlinked, fix_parameters=True) with nn.parameter_scope("PCA"): nn.load_parameters(config["train"]["shape_loss"]["PCA_param_path"]) shape_bod_map_real_A = PF.affine(shape_bod_map_real_A, 212, fix_parameters=True) shape_bod_map_real_A = shape_bod_map_real_A[:, :3] shape_bod_map_fake_B = PF.affine(shape_bod_map_fake_B, 212, fix_parameters=True) shape_bod_map_fake_B = shape_bod_map_fake_B[:, :3] shape_bod_map_real_B = PF.affine(shape_bod_map_real_B, 212, fix_parameters=True) shape_bod_map_real_B = shape_bod_map_real_B[:, :3] shape_bod_map_fake_A = PF.affine(shape_bod_map_fake_A, 212, fix_parameters=True) shape_bod_map_fake_A = shape_bod_map_fake_A[:, :3] shape_bod_map_real_A.persistent, shape_bod_map_fake_A.persistent = True, True shape_bod_map_real_B.persistent, shape_bod_map_fake_B.persistent = True, True loss_shape_A = recon_loss(shape_bod_map_real_A, shape_bod_map_fake_B) loss_shape_B = recon_loss(shape_bod_map_real_B, shape_bod_map_fake_A) loss_shape_A.persistent, loss_shape_B.persistent = True, True loss_shape = loss_shape_A + loss_shape_B # Total Generator Loss loss_netG = loss_gan if with_cycle_loss: loss_netG += loss_cycle * config["train"]["cycle_loss"]["lambda"] if with_shape_loss: loss_netG += loss_shape * config["train"]["shape_loss"]["lambda"] # Discriminator loss loss_netD_A = lsgan_loss(pred_real_A, real_target) + \ lsgan_loss(pred_fake_A, fake_target) loss_netD_B = lsgan_loss(pred_real_B, real_target) + \ lsgan_loss(pred_fake_B, fake_target) loss_netD_A.persistent, loss_netD_B.persistent = True, True loss_netD = loss_netD_A + loss_netD_B ################### Setting Solvers #################### # Generator solver with nn.parameter_scope('netG_transformer'): with nn.parameter_scope('netG_A2B'): solver_netG_AB.set_parameters(nn.get_parameters()) with nn.parameter_scope('netG_B2A'): solver_netG_BA.set_parameters(nn.get_parameters()) # Discrimintar solver with nn.parameter_scope('netD_transformer'): with nn.parameter_scope('netD_A'): solver_netD_A.set_parameters(nn.get_parameters()) with nn.parameter_scope('netD_B'): solver_netD_B.set_parameters(nn.get_parameters()) ################### Create Monitors #################### interval = config["monitor"]["interval"] monitors_G_dict = { 'loss_netG': loss_netG, 'loss_gan_A': loss_gan_A, 'loss_gan_B': loss_gan_B } if with_cycle_loss: monitors_G_dict.update({ 'loss_cycle_A': loss_cycle_A, 'loss_cycle_B': loss_cycle_B }) if with_shape_loss: monitors_G_dict.update({ 'loss_shape_A': loss_shape_A, 'loss_shape_B': loss_shape_B }) monitors_G = MonitorManager(monitors_G_dict, monitor, interval=interval) monitors_D_dict = { 'loss_netD': loss_netD, 'loss_netD_A': loss_netD_A, 'loss_netD_B': loss_netD_B } monitors_D = MonitorManager(monitors_D_dict, monitor, interval=interval) monitor_time = nm.MonitorTimeElapsed('time_training', monitor, interval=interval) monitor_vis = nm.MonitorImage('result', monitor, interval=1, num_images=4, normalize_method=lambda x: x) # Dump training information with open(os.path.join(monitor._save_path, "training_info.yaml"), "w", encoding="utf-8") as f: f.write(yaml.dump(config)) # Training epoch = config["train"]["epochs"] i = 0 iter_per_epoch = train_iterator_src.size // config["train"][ "batch_size"] + 1 for e in range(epoch): logger.info(f'Epoch = {e} / {epoch}') train_iterator_src._reset() # rewind the iterator train_iterator_trg._reset() # rewind the iterator for _ in range(iter_per_epoch): bod_map_A = train_iterator_src.next()[0] bod_map_B = train_iterator_trg.next()[0] real_bod_map_A.d, real_bod_map_B.d = bod_map_A, bod_map_B # Generate fake image fake_bod_map_B.forward(clear_no_need_grad=True) fake_bod_map_A.forward(clear_no_need_grad=True) # Update Discriminator solver_netD_A.zero_grad() solver_netD_B.zero_grad() loss_netD.forward(clear_no_need_grad=True) loss_netD.backward(clear_buffer=True) if config["train"]["weight_decay"]: solver_netD_A.weight_decay(config["train"]["weight_decay"]) solver_netD_B.weight_decay(config["train"]["weight_decay"]) solver_netD_A.update() solver_netD_B.update() # Update Generator solver_netG_BA.zero_grad() solver_netG_AB.zero_grad() solver_netD_A.zero_grad() solver_netD_B.zero_grad() fake_bod_map_B_unlinked.grad.zero() fake_bod_map_A_unlinked.grad.zero() loss_netG.forward(clear_no_need_grad=True) loss_netG.backward(clear_buffer=True) fake_bod_map_B.backward(grad=None) fake_bod_map_A.backward(grad=None) solver_netG_AB.update() solver_netG_BA.update() # Monitors monitor_time.add(i) monitors_G.add(i) monitors_D.add(i) i += 1 images_to_visualize = [ real_bod_map_A.d, fake_bod_map_B.d, real_bod_map_B.d ] if with_cycle_loss: images_to_visualize.extend( [recon_bod_map_A.d, fake_bod_map_A.d, recon_bod_map_B.d]) else: images_to_visualize.extend([fake_bod_map_A.d]) visuals = combine_images(images_to_visualize) monitor_vis.add(i, visuals) if e % config["monitor"]["save_interval"] == 0 or e == epoch - 1: # Save parameters of networks netG_B2A_save_path = os.path.join(monitor._save_path, f'netG_transformer_B2A_{e}.h5') netG_A2B_save_path = os.path.join(monitor._save_path, f'netG_transformer_A2B_{e}.h5') with nn.parameter_scope('netG_transformer'): with nn.parameter_scope('netG_A2B'): nn.save_parameters(netG_A2B_save_path) with nn.parameter_scope('netG_B2A'): nn.save_parameters(netG_B2A_save_path) netD_A_save_path = os.path.join(monitor._save_path, f'netD_transformer_A_{e}.h5') netD_B_save_path = os.path.join(monitor._save_path, f'netD_transformer_B_{e}.h5') with nn.parameter_scope('netD_transformer'): with nn.parameter_scope('netD_A'): nn.save_parameters(netD_A_save_path) with nn.parameter_scope('netD_B'): nn.save_parameters(netD_B_save_path)
def call(self, memory, inputs=None): r"""Return mel-spectrogram and attention matrix. Args: memory(nn.Variable): A 3D tensor of shape (T, B, C). inputs(nn.Variable, optional): A 3D tensor with shape of [B, T/r, n_mels(*r)]. Shifted log melspectrogram of sound files. Defaults to None. Returns: nn.Variable: The synthetic mel-spectrograms of shape (B, Ty/r, r*n_mels). nn.Variable: The attention matrix of shape (B, Tx, Ty). References: - https://github.com/Kyubyong/tacotron/ """ hp = self._hparams bz, mel_shape = hp.batch_size, hp.n_mels * hp.r encoder_dim = hp.encoder_embedding_dim # initialize input tensor input = F.constant(shape=(bz, 1, mel_shape)) # initialize hidden states context = F.constant(shape=(bz, 1, hp.attention_dim)) hidden = F.constant(shape=(1, 1, bz, encoder_dim)) h_gru = [ F.constant(shape=(1, 1, bz, encoder_dim)), F.constant(shape=(1, 1, bz, encoder_dim)) ] outputs, attends = [], [] for i in range(hp.n_frames): if i > 0: input = (outputs[-1] if inputs is None else inputs[:, i - 1:i, :]) # feed a prenet to the input input = prenet(input, layer_sizes=hp.prenet_channels, is_training=self.training, scope='prenet_decoder') # (bz, 1, C) # concat the input and context vector input = F.concatenate(input, context) # (bz, 1, 384) with nn.parameter_scope('rnn_attention'): # calculate the output output, hidden = PF.gru( input.reshape((1, bz, -1)), hidden, training=self.training, bidirectional=False) # (1, bz, 256), (1, 1, bz, 256) # compute the context and attention vectors context, attend = Bahdanau_attention( F.transpose(hidden[0], (1, 0, 2)), memory, out_features=hp.attention_dim, scope='Bahdanau_attention') # (bz, 1, 256), (bz, 1, T) with nn.parameter_scope('rnn_decoder'): # concat RNN output and attention context vector with nn.parameter_scope('project_to_decoder'): output = F.concatenate(output, F.transpose(context, (1, 0, 2)), axis=2) output = PF.affine(output, encoder_dim, base_axis=2) # (1, bz, 256) # decoder RNN with residual connection for j in range(2): with nn.parameter_scope(f'gru_resisidual_{j}'): out, h_gru[j] = PF.gru(output, h_gru[j], training=self.training, bidirectional=False) output += out # (1, bz, 256) # projector to mels with nn.parameter_scope('project_to_mel'): output = F.transpose(output, (1, 0, 2)) # (bz, 1, n_mels*r) output = PF.affine(output, mel_shape, base_axis=2) outputs.append(output) attends.append(attend) outputs = F.concatenate(*outputs, axis=1) # (B, T2, C2) attends = F.concatenate(*attends, axis=1) # (B, T2, T1) return outputs, attends
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def _loss_minus(self, dout): return F.squared_error(dout, F.constant(0., shape=dout.shape))
def cbhg(inputs, K, projections, depth, is_training, scope): r"""Returns the 1D Convolution Bank Highwaynet bindirectional GRU (CBHG) module. Args: inputs (nn.Variable): NNabla Variable of shape (B, C, T). K (int): Maximum kernel size. projections (list of int): A list of channels. depth (int): A depth. This should be an even number. is_training (bool): Whether training mode is activated. scope (str): The parameter scope name. Returns: nn.Variable: Output variable. """ with nn.parameter_scope(scope): # Convolution bank: concatenate channels from all 1D convolutions with nn.parameter_scope('conv_bank'): conv = partial(conv1d, inputs, channels=128, activation=F.relu, is_training=is_training) conv_outputs = [conv(kernel_size=k, scope=f'conv1d_{k}') for k in range(1, K+1)] conv_outputs = F.concatenate(*conv_outputs, axis=1) # make sure a valid input to max_pooling x = F.pad(conv_outputs, (0,)*5+(1,), mode='constant') # Maxpooling: reshape is needed because nnabla does support 1D pooling maxpool_output = F.max_pooling( x.reshape(x.shape + (1,)), kernel=(2, 1), stride=(1, 1) ).reshape(conv_outputs.shape) # Two projection layers: proj1_output = conv1d( maxpool_output, kernel_size=3, channels=projections[0], activation=F.relu, is_training=is_training, scope='proj_1' ) proj2_output = conv1d( proj1_output, kernel_size=3, channels=projections[1], activation=None, is_training=is_training, scope='proj_2' ) # Residual connection: highway_input = proj2_output + inputs assert depth % 2 == 0 half_depth = depth // 2 with nn.parameter_scope('highwaynet'): # transposing to shape (B, T, C) highway_input = F.transpose(highway_input, (0, 2, 1)) # Handle dimensionality mismatch: if highway_input.shape[2] != half_depth: highway_input = PF.affine( highway_input, half_depth, base_axis=2, name='adjust_dim' ) # 4-layer HighwayNet: for i in range(4): highway_input = highwaynet( highway_input, half_depth, scope=f'highway_{i+1}' ) with nn.parameter_scope('rnn_net'): # transpose to shape (T, B, C) rnn_input = F.transpose(highway_input, (1, 0, 2)) outputs, _ = PF.gru( rnn_input, F.constant(shape=(2, 2, rnn_input.shape[1], half_depth)), training=is_training, bidirectional=True ) # (T, B, C) return outputs
def SquaredError_Scalor(x, val=1): return F.squared_error(x, F.constant(val, x.shape))
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args with_bias = True if len(inputs) == 4 else False base_axis = self.forward_func.info.args["base_axis"] pad = self.forward_func.info.args["pad"] stride = self.forward_func.info.args["stride"] dilation = self.forward_func.info.args["dilation"] group = self.forward_func.info.args["group"] channel_last = self.forward_func.info.args["channel_last"] # TODO: BHWC assert channel_last == False, "`channel_last = False` is only supported now." # Inputs x0 = inputs[0].data w0 = inputs[1].data b0 = inputs[2].data if with_bias else None dy = inputs[3].data if with_bias else inputs[2].data # Outputs dx0 = outputs[0].data dw0 = outputs[1].data db0 = outputs[2].data if with_bias else None # Grads of inputs g_x0 = inputs[0].grad g_w0 = inputs[1].grad g_b0 = inputs[2].grad if with_bias else None g_dy = inputs[3].grad if with_bias else inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dw0 = outputs[1].grad g_db0 = outputs[2].grad if with_bias else None # Computation ## w.r.t. x or w.r.t. w if prop_down[0] or prop_down[1]: # we can re-use the backward of the forward with different inputs inp_x = nn.Variable(x0.shape).apply(data=g_dx0, grad=g_x0, need_grad=prop_down[0]) inp_w = nn.Variable(w0.shape).apply(data=g_dw0, grad=g_w0, need_grad=prop_down[1]) out_y = nn.Variable(dy.shape).apply(grad=dy) inputs = [inp_x, inp_w] outputs = [out_y] if with_bias: inp_b = nn.Variable(b0.shape).apply(need_grad=False) inputs += [inp_b] self.forward_func.backward(inputs, outputs, accum) ## w.r.t. b if with_bias and prop_down[2] and not accum[2]: zeros = F.constant(0, b0.shape) if not nn.get_auto_forward(): zeros.forward() g_b0.copy_from(zeros.data) ## w.r.t. dy if (not with_bias and prop_down[2]) or (with_bias and prop_down[3]): accum_dy = accum[3] if with_bias else accum[2] g_dy_ = F.convolution(g_dx0, w0, None, base_axis, pad, stride, dilation, group, channel_last) \ + F.convolution(x0, g_dw0, None, base_axis, pad, stride, dilation, group, channel_last) if with_bias: g_db0 = F.reshape(g_db0, [ 1 if i != base_axis else g_db0.shape[0] for i in range(g_dy.ndim) ]) g_dy_ += g_db0 if accum_dy: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint files. start_point = load_checkpoint(args.checkpoint, { "gen": solver_gen, "dis": solver_dis }) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.) data = data_iterator_mnist(args.batch_size, True) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.model_save_interval == 0: save_checkpoint(args.model_save_path, i, { "gen": solver_gen, "dis": solver_dis }) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'), contents)
def __call__(self, outputs, inputs, grad_outputs=None, persistent_outputs=[], bind_grad_output=False): """ The logic of this method is almost same as one in visit_function_backward in C++ layer. """ # TODO: address auto_forward is very slow. It may be python overhead since small diff when BS is large. # TODO: address auto_forward consumes lots of memory, need to call v.get_unlinked_variable()? # TODO: address auto_forward consumes lots of memory, need to use NdArray as inputs? # Check outputs/inputs outputs = self._force_list(outputs) if not all([isinstance(o, nn.Variable) for o in outputs]): raise ValueError("Element of outputs must be `nnabla.Variable`.") inputs = self._force_list(inputs) if not all([isinstance(i, nn.Variable) for i in inputs]): raise ValueError("Element of inputs must be `nnabla.Variable`.") # Check grad_outputs if grad_outputs is None: grad_outputs = [None] * len(outputs) elif isinstance(grad_outputs, (int, float, np.ndarray, nn.NdArray)): grad_outputs = self._force_list(grad_outputs) elif isinstance(grad_outputs, list): if len(outputs) != len(grad_outputs): raise ValueError( "Length of `grad_outputs` and length of `outputs` must be the same." ) for i in range(len(outputs)): o = outputs[i] go = grad_outputs[i] if not isinstance(go, (type(None), int, float, np.ndarray, nn.NdArray, nn.Variable)): raise ValueError( "Element of `grad_outputs` must be " "in (`None`, `int`, `float`, `numpy.ndarray`, " "`nnabla.NdArray`, `nnabla.Variable`) or " "list of (`None`, `int`, `float`, `numpy.ndarray`, " "`nnabla.NdArray`, `nnabla.Variable`)\n" "type(grad_outputs[{}] = {}".format(i, type(go))) elif isinstance(go, (np.ndarray, nn.NdArray, nn.Variable)) and go.shape != o.shape: raise ValueError( "Shape of each of outputs and grad_outputs must be same.\n" "output[{}]({}) != grad_output[{}]({})".format( i, o.shape, i, go.shape)) # Check persistent_outputs if len(persistent_outputs) != 0 and len(outputs) != len( persistent_outputs): raise ValueError( "Length of outputs and persistent_outputs " "must be the same except for " "the case that the length of the persistent_outputs is 0.") # Persistent outputs since outputs are basically losses to be monitored persistent_outputs = [True] * len( outputs) if persistent_outputs == [] else persistent_outputs for o, p in zip(outputs, persistent_outputs): o.persistent = p # Open list of next search candidate ids = {} def get_id(func): if func not in ids.keys(): size = len(ids) ids[func] = size return size return ids[func] open = set() # Map for grad_variables consumed on the gradient graph. # End is the special case where d_o = end_f(o) and map[end_f] = {o: [d_o]} grad_vars = OrderedDict() # {F_fwd: {VO_fwd: [VI_bwd]}} # Set grad_outputs for i in range(len(outputs)): o = outputs[i] go = grad_outputs[i] if go is None: output = o elif isinstance(go, (int, float)): go = nn.Variable(o.shape).apply(d=go, need_grad=False) output = o * go elif isinstance(go, np.ndarray): go = nn.Variable(o.shape).apply(d=go, need_grad=False) output = o * go elif isinstance(go, nn.NdArray): go = nn.Variable(o.shape).apply(data=go, need_grad=False) output = o * go elif isinstance(go, nn.Variable): output = o * go func = output.parent open.add((-output.rank, get_id(func), func)) # Connect the graph and its gradient graph grad_output = GradEndFunction()(output).apply(need_grad=False) grad_vars[func] = OrderedDict({output: [grad_output]}) # Return grads but # replace inputs params with the vars connected with the given graph wrt_inputs = self._get_corresponding_vars_on_graph(inputs, outputs) grads = [None] * len(wrt_inputs) child_nodes = self._get_children(wrt_inputs) wrt_inputs = [nn.Variable() if x is None else x for x in wrt_inputs] # Expand the graph to its gradient graph while len(open) != 0: open = sorted(open) # python set is NOT sorted set. rank_func = open.pop(0) # 0 is necessary open = set(open) f = rank_func[2] if not f.need_grad: continue # Connect variables on the gradient graph grad_outputs = self._connect_on_gradient_graph(grad_vars, f) # Check grads w.r.t. inputs for inp, grad_out in zip(f.inputs, grad_outputs): if inp not in wrt_inputs or inp.need_grad == False or grad_out is None: continue idx = wrt_inputs.index(inp) if grads[idx] is None: grads[idx] = grad_out else: grads[idx] = grads[idx] + grad_out # accum at leaf if bind_grad_output: inp.grad = grads[idx].data # Propagate down for inp, grad_out in zip(f.inputs, grad_outputs): if inp not in child_nodes or not inp.need_grad or grad_out is None: continue p_i = inp.parent if not p_i: continue open.add((-p_i.rank, get_id(p_i), p_i)) # If the final grads has None, then None becomes zero Variable(s). for i in range(len(grads)): if grads[i]: continue grads[i] = F.constant(0, wrt_inputs[i].shape) return grads
def capsule_layer(u, num_j=10, out_channels=16, num_routing_iter=3, grad_dynamic_routing=False, fix_parameters=False): ''' Takes PrimaryCapules output and produces DigitsCapsules. Args: u (nnabla.Variable): A shape of [B, in_capsules, in_channels] num_j (int): Number of output capsules. out_channels (int): Number of units in each capsule of the output. num_routing_iter (int): Dynamic routing iterations. grad_dynamic_routing (bool): If False, it doesn't compute gradients of dynamic routing coefficients as if they are given as hyperparameters. fix_parameters (bool): Fix parameters (Set need_grad=False). Returns: nn.Variable: A shape [B, num_j, out_channels]. ''' assert num_routing_iter > 0 batch_size = u.shape[0] num_i = u.shape[1] # 32 * 6 * 6 in_channels = u.shape[2] # Routing u_hat = W u in eq 2. # Implementing with broadcast and batch_matmul. Maybe not efficient. # Create a parameter tensor # Note: Consider num input channels multiplied by num input capsules from nnabla.initializer import UniformInitializer, calc_uniform_lim_glorot from nnabla.parameter import get_parameter_or_create w_init = UniformInitializer( calc_uniform_lim_glorot(num_i * in_channels, out_channels)) w_ij = get_parameter_or_create( "W", (1, num_j, num_i, in_channels, out_channels), w_init, not fix_parameters) # Tileing w_ij to [batch_size, num_j, num_i, in_channels, out_channels]. w_ij_tiled = F.broadcast(w_ij, (batch_size,) + w_ij.shape[1:]) # Tileing u to [batch_size, num_j, num_i, 1, in_channels]. u = u.reshape((batch_size, 1, num_i, 1, in_channels)) u_tiled = F.broadcast(u, (batch_size, num_j, num_i, 1, in_channels)) # Apply batched matrix multiplication: # [1, in_channels] * [in_channels, out_channels] --> [1, out_channels] # u_hat shape: [batch_size, num_j, num_i, out_channels] u_hat = F.batch_matmul(u_tiled, w_ij_tiled).reshape( (batch_size, num_j, num_i, out_channels)) # Dynamic Routing iteration doesn't compute gradients. # u_hat only used at the final step of computation of s. u_hat_no_grad = u_hat if not grad_dynamic_routing: u_hat_no_grad = F.identity(u_hat) u_hat_no_grad.need_grad = False # Dynamic routing described in Procedure 1. b = F.constant(0, (batch_size, num_j, num_i, 1)) for r in range(num_routing_iter): # u_hat is only used in the last step. uh = u_hat_no_grad if r == num_routing_iter - 1: uh = u_hat # 4: Softmax in eq 3 c = F.softmax(b, axis=1) # 5: Left of eq 2. s shape: [B, num_j, out_channels] s = F.sum(c * uh, axis=2) # 6: eq 1 v = squash(s, axis=2) if r == num_routing_iter - 1: return u_hat, v # 7: Update by agreement b = b + F.sum(v.reshape((batch_size, num_j, 1, out_channels)) * uh, axis=3, keepdims=True)
def call(self, memory, decoder_inputs=None): r"""Return mel-spectrograms, gate outputs and an attention matrix. Args: memory (nn.Variable): A 3D tensor of shape (B, T, C). decoder_inputs (nn.Variable, optional): A 3D tensor with shape of (B, T/r, r*n_mels). Shifted log melspectrogram of sound files. Defaults to None. Returns: nn.Variable: The synthetic mel-spectrograms of shape (B, Ty/r, r*n_mels). nn.Variable: The gate outputs of shape (B, Ty). nn.Variable: The attention matrix of shape (B, Tx, Ty). """ hp = self._hparams mel_shape = hp.n_mels * hp.r # initialize decoder states decoder_input = F.constant(shape=(hp.batch_size, 1, mel_shape)) decoder_hidden = F.constant(shape=(1, 1, hp.batch_size, hp.decoder_rnn_dim)) decoder_cell = F.constant(shape=(1, 1, hp.batch_size, hp.decoder_rnn_dim)) # initialize attention states attention_weights = F.constant(shape=(hp.batch_size, 1, hp.text_len)) attention_weights_cum = F.constant(shape=(hp.batch_size, 1, hp.text_len)) attention_context = F.constant(shape=(hp.batch_size, 1, hp.encoder_embedding_dim)) attention_hidden = F.constant(shape=(1, 1, hp.batch_size, hp.attention_rnn_dim)) attention_cell = F.constant(shape=(1, 1, hp.batch_size, hp.attention_rnn_dim)) # store outputs mel_outputs, gate_outputs, alignments = [], [], [] for i in range(hp.mel_len): if i > 0: decoder_input = (mel_outputs[-1] if decoder_inputs is None else decoder_inputs[:, i - 1:i, :]) if decoder_inputs is None: decoder_input = decoder_input[None, ...] # decoder of shape (B, 1, prenet_channels=256) decoder_input = prenet(decoder_input, hp.prenet_channels, is_training=self.training, scope='prenet') with nn.parameter_scope('attention_rnn'): # cell_input of shape (B, 1, prenet_channels[-1] + C=768) cell_input = F.concatenate(decoder_input, attention_context, axis=2) _, attention_hidden, attention_cell = PF.lstm( F.transpose(cell_input, (1, 0, 2)), attention_hidden, attention_cell, training=self.training, name='lstm_attention' ) # (1, 1, B, attention_hidden), (1, 1, B, attention_hidden) if self.training: attention_hidden = F.dropout(attention_hidden, hp.p_attention_dropout) with nn.parameter_scope('location_attention'): attention_weights_cat = F.concatenate(attention_weights, attention_weights_cum, axis=1) attention_context, attention_weights = location_sensitive_attention( F.transpose(attention_hidden[0], (1, 0, 2)), memory, attention_weights_cat, attention_location_kernel_size=hp. attention_location_kernel_size, attention_n_filters=hp.attention_location_n_filters, attention_dim=hp.attention_dim, is_training=self.training, scope='ls_attention') attention_weights_cum += attention_weights alignments.append(attention_weights) with nn.parameter_scope('decoder_rnn'): # (1, B, attention_rnn_dim + encoder_embedding_dim) inp_decoder = F.concatenate(attention_hidden[0], F.transpose( attention_context, (1, 0, 2)), axis=2) _, decoder_hidden, decoder_cell = PF.lstm( inp_decoder, decoder_hidden, decoder_cell, training=self.training, name='lstm_decoder') if self.training: decoder_hidden = F.dropout(decoder_hidden, hp.p_decoder_dropout) with nn.parameter_scope('projection'): proj_input = F.concatenate( decoder_hidden[0, 0], F.reshape(attention_context, (hp.batch_size, -1), inplace=False), axis=1) # (B, decoder_rnn_dim + encoder_embedding_dim) decoder_output = affine_norm(proj_input, mel_shape, base_axis=1, with_bias=True, w_init_gain='affine', scope='affine') mel_outputs.append(decoder_output) with nn.parameter_scope('gate_prediction'): gate_prediction = affine_norm(proj_input, 1, base_axis=1, with_bias=True, w_init_gain='sigmoid', scope='affine') gate_outputs.append(gate_prediction) # (B, T2, n_mels*r) mel_outputs = F.stack(*mel_outputs, axis=1) gate_outputs = F.concatenate(*gate_outputs, axis=1) # (B, T2) alignments = F.concatenate(*alignments, axis=1) # (B, T1, T2) return mel_outputs, gate_outputs, alignments
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context lambda_ = args.lambda_ # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred, log_var = cnn_model_003(ctx, x_l) one = F.constant(1., log_var.shape) loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var) reg_sigma = sigma_regularization(ctx, log_var, one) loss_supervised = loss_ce + er_loss(ctx, pred) + lambda_ * reg_sigma ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0) pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0, log_var1) reg_sigma0 = sigma_regularization(ctx, log_var0, one) reg_sigma1 = sigma_regularization(ctx, log_var1, one) reg_sigmas = sigmas_regularization(ctx, log_var0, log_var1) loss_unsupervised = loss_sr + er_loss(ctx, pred_x_u0) + er_loss(ctx, pred_x_u1) \ + lambda_ * (reg_sigma0 + reg_sigma1) + lambda_ * reg_sigmas ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval, _ = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if (i+1) % iter_epoch == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch +=1
def train(batch_size, X_train, max_iter): from nnabla.ext_utils import get_extension_context context = "cpu" ctx = get_extension_context(context, device_id="0", type_config="float") nn.set_default_context(ctx) z = nn.Variable([batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True pred_fake = discriminator(fake) labels = func.constant(1, pred_fake.shape) loss_gen = func.mean(func.sigmoid_cross_entropy(pred_fake, labels)) fake_disc = fake.get_unlinked_variable(need_grad=True) pred_fake_disc = discriminator(fake_disc) disc_fake_label = func.constant(0, pred_fake_disc.shape) loss_disc_fake = func.mean( func.sigmoid_cross_entropy(pred_fake_disc, disc_fake_label)) r = nn.Variable([batch_size, 784]) real_pred = discriminator(r) disc_real_label = func.constant(0, real_pred.shape) loss_disc_real = func.mean( func.sigmoid_cross_entropy(pred_fake_disc, disc_real_label)) loss_disc = loss_disc_real + loss_disc_fake solver_gen = sol.Adam(0.0002, beta1=0.5) solver_disc = sol.Adam(0.0002, beta1=0.5) for i in range(0, max_iter): index = np.random.randint(0, X_train.shape[0], size=batch_size) input_image = X_train[index] r.d = input_image z.d = np.random.randn(*z.shape) solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(0.0001) solver_gen.update() solver_disc.zero_grad() loss_disc.forward(clear_no_need_grad=True) loss_disc.backward(clear_buffer=True) solver_disc.weight_decay(0.0001) solver_disc.update() print( "epoch-->[%d]-------loss_generator-->[%f]-------loss_discriminator-->[%f]" % (i, loss_gen.d, loss_disc.d)) if i % 100 == 0: with nn.parameter_scope("generator"): nn.save_parameters( "/home/vaibhav/deep_learning/gan/code/gen_weights/epoch_%d.h5" % i) with nn.parameter_scope("discriminator"): nn.save_parameters( "/home/vaibhav/deep_learning/gan/code/disc_weights/epoch_%d.h5" % i)
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = args.n_label n_train_data = 73257 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = args.epoch act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context lambda_ = args.lambda_ # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred, log_var = cnn_model_003(ctx, x_l) one = F.constant(1., log_var.shape) loss_ce = ce_loss(ctx, pred, y_l) reg_sigma = sigma_regularization(ctx, log_var, one) loss_supervised = loss_ce + er_loss(ctx, pred) + lambda_ * reg_sigma ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0) pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0, log_var1) reg_sigma0 = sigma_regularization(ctx, log_var0, one) reg_sigma1 = sigma_regularization(ctx, log_var1, one) reg_sigmas = sigmas_regularization(ctx, log_var0, log_var1) loss_unsupervised = loss_sr + er_loss(ctx, pred_x_u0) + er_loss(ctx, pred_x_u1) \ + lambda_ * (reg_sigma0 + reg_sigma1) + lambda_ * reg_sigmas ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval, _ = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/svhn/train.mat") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/svhn/l_train.mat") u_train_path = os.path.join(home, "datasets/svhn/u_train.mat") test_path = os.path.join(home, "datasets/svhn/test.mat") # data reader data_reader = SVHNDataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=False, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if int((i+1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 ve /= iter_val msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve) * 100) print(msg) if ve < ve_best: if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) if save_path_prev != "": os.remove(save_path_prev) save_path = os.path.join( args.model_save_path, 'params_%06d.h5' % epoch) nn.save_parameters(save_path) save_path_prev = save_path ve_best = ve st = time.time() epoch +=1
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % args.max_iter)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % args.max_iter))
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred, log_var = cnn_model_003(ctx, x_l) one = F.constant(1., log_var.shape) loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var) reg_sigma = sigma_regularization(ctx, log_var, one) loss_supervised = loss_ce + reg_sigma ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0) pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0, log_var1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) reg_sigma0 = sigma_regularization(ctx, log_var0, one) reg_sigma1 = sigma_regularization(ctx, log_var1, one) loss_unsupervised = loss_sr + loss_er0 + loss_er1 \ + reg_sigma0 + reg_sigma1 ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval, _ = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver_l= S.Adam(alpha=learning_rate) solver_l.set_parameters(nn.get_parameters()) solver_u= S.Adam(alpha=learning_rate) solver_u.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train ## for supervised loss loss_supervised.forward(clear_no_need_grad=True) solver_l.zero_grad() loss_supervised.backward(clear_buffer=True) solver_l.update() ## for unsupervised loss loss_unsupervised.forward(clear_no_need_grad=True) solver_u.zero_grad() loss_unsupervised.backward(clear_buffer=True) solver_u.update() # Evaluate if (i+1) % iter_epoch == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch +=1
def test_nan_inf_tracer(batch_size, n_class, ext_name, trace_nan, trace_inf): nn.clear_parameters() ctx = get_extension_context(ext_name) nn.set_default_context(ctx) x = nn.Variable.from_numpy_array( np.random.normal(size=(batch_size, 3, 16, 16))) t = nn.Variable.from_numpy_array( np.random.randint(low=0, high=n_class, size=(batch_size, 1))) y = simple_cnn(x, t, n_class) must_be_inf = y / F.constant(0, shape=y.shape) must_be_nan = must_be_inf / must_be_inf # Refresh all arrays once so as to ensure all grad values are 0. must_be_nan.visit(_refresh_inputs_grad) nit = NanInfTracer(trace_nan=trace_nan, trace_inf=trace_inf) # can be run at any cases without exception. with nit.trace(): y.forward(clear_no_need_grad=True, function_post_hook=nit.forward_post_hook) y.backward(clear_buffer=True, function_post_hook=nit.backward_post_hook) nit.check() # this call can also work without exception. # check nan if trace_nan: with pytest.raises(ValueError): with nit.trace(): must_be_nan.forward(clear_buffer=True, function_post_hook=nit.forward_post_hook) with pytest.raises(ValueError): with nit.trace(): must_be_nan.backward(clear_buffer=True, function_post_hook=nit.backward_post_hook) must_be_nan.forward(clear_buffer=True, function_post_hook=nit.forward_post_hook) with pytest.raises(ValueError): nit.check() must_be_nan.backward(clear_buffer=True, function_post_hook=nit.backward_post_hook) with pytest.raises(ValueError): nit.check() # check inf if trace_inf: with pytest.raises(ValueError): with nit.trace(): must_be_inf.forward(clear_buffer=True, function_post_hook=nit.forward_post_hook) must_be_inf.forward(clear_buffer=True, function_post_hook=nit.forward_post_hook) with pytest.raises(ValueError): nit.check()