def train(): """training""" image_pool = ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) # define a summary writer that logs data and flushes to the file every 5 seconds sw = SummaryWriter(logdir='%s' % dir_out_sw, flush_secs=5, verbose=False) global_step = 0 for epoch in range(epochs): if epoch == 0: netG.hybridize() netD.hybridize() # sw.add_graph(netG) # sw.add_graph(netD) tic = time.time() btic = time.time() train_data.reset() val_data.reset() iter = 0 for local_step, batch in enumerate(train_data): ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### tmp = mx.nd.concat(batch.data[0], batch.data[1], batch.data[2], dim=1) tmp = augmenter(tmp, patch_size=128, offset=offset, aug_type=1, aug_methods=aug_methods, random_crop=False) real_in = tmp[:, :1].as_in_context(ctx) real_out = tmp[:, 1:2].as_in_context(ctx) m = tmp[:, 2:3].as_in_context(ctx) # mask fake_out = netG(real_in) * m # loss weight based on mask, applied on L1 loss if no_loss_weights: loss_weight = m else: loss_weight = m.asnumpy() loss_weight[loss_weight == 0] = .1 loss_weight = mx.nd.array(loss_weight, ctx=m.context) fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1)) with autograd.record(): # Train with fake image # Use image pooling to utilize history images output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([ fake_label, ], [ output, ]) # Train with real image real_concat = nd.concat(real_in, real_out, dim=1) output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([ real_label, ], [ output, ]) trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_out = netG(real_in) fake_concat = nd.concat(real_in, fake_out, dim=1) output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss(output, real_label) + loss_2nd( real_out, fake_out, loss_weight) * lambda1 errG.backward() trainerG.step(batch.data[0].shape[0]) sw.add_scalar(tag='loss', value=('d_loss', errD.mean().asscalar()), global_step=global_step) sw.add_scalar(tag='loss', value=('g_loss', errG.mean().asscalar()), global_step=global_step) global_step += 1 if epoch + local_step == 0: sw.add_graph((netG)) img_in_list, img_out_list, m_val = val_data.next().data m_val = m_val.as_in_context(ctx) sw.add_image('first_minibatch_train_real', norm3(real_out)) sw.add_image('first_minibatch_val_real', norm3(img_out_list.as_in_context(ctx))) netG.export('%snetG' % dir_out_checkpoints) if local_step == 0: # Log the first batch of images of each epoch (training) sw.add_image('first_minibatch_train_fake', norm3(fake_out * m) * m, epoch) sw.add_image( 'first_minibatch_val_fake', norm3(netG(img_in_list.as_in_context(ctx)) * m_val) * m_val, epoch) # norm3(netG(img_in_list.as_in_context(ctx)) * m_val.as_in_context(ctx)), epoch) if (iter + 1) % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, iter, epoch)) iter += 1 btic = time.time() sw.add_scalar(tag='binary_training_acc', value=('acc', acc), global_step=epoch) name, acc = metric.get() metric.reset() fake_val = netG(val_data.data[0][1].as_in_context(ctx)) loss_val = loss_2nd(val_data.data[1][1].as_in_context(ctx), fake_val, val_data.data[2][1].as_in_context(ctx)) * lambda1 sw.add_scalar(tag='loss_val', value=('g_loss', loss_val.mean().asscalar()), global_step=epoch) if (epoch % check_point_interval == 0) | (epoch == epochs - 1): netD.save_params('%snetD-%04d' % (dir_out_checkpoints, epoch)) netG.save_params('%snetG-%04d' % (dir_out_checkpoints, epoch)) logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) sw.export_scalars('scalar_dict.json') sw.close()
def train(epochs, ctx): # Collect all parameters from net and its children, then initialize them. net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) net.hybridize() # Trainer is for updating parameters with gradient. trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': opt.lr, 'momentum': opt.momentum}) metric = mx.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() # collect parameter names for logging the gradients of parameters in each epoch params = net.collect_params() param_names = params.keys() # define a summary writer that logs data and flushes to the file every 5 seconds sw = SummaryWriter(logdir='./logs', flush_secs=5) global_step = 0 for epoch in range(epochs): # reset data iterator and metric at begining of epoch. metric.reset() for i, (data, label) in enumerate(train_data): # Copy data to ctx if necessary data = data.as_in_context(ctx) label = label.as_in_context(ctx) # Start recording computation graph with record() section. # Recorded graphs can then be differentiated with backward. with autograd.record(): output = net(data) L = loss(output, label) sw.add_scalar(tag='cross_entropy', value=L.mean().asscalar(), global_step=global_step) global_step += 1 L.backward() # take a gradient step with batch_size equal to data.shape[0] trainer.step(data.shape[0]) # update metric at last. metric.update([label], [output]) if i % opt.log_interval == 0 and i > 0: name, train_acc = metric.get() print('[Epoch %d Batch %d] Training: %s=%f' % (epoch, i, name, train_acc)) # Log the first batch of images of each epoch if i == 0: sw.add_image('minist_first_minibatch', data.reshape((opt.batch_size, 1, 28, 28)), epoch) if epoch == 0: sw.add_graph(net) grads = [i.grad() for i in net.collect_params().values()] assert len(grads) == len(param_names) # logging the gradients of parameters for checking convergence for i, name in enumerate(param_names): sw.add_histogram(tag=name, values=grads[i], global_step=epoch, bins=1000) name, train_acc = metric.get() print('[Epoch %d] Training: %s=%f' % (epoch, name, train_acc)) # logging training accuracy sw.add_scalar(tag='accuracy_curves', value=('train_acc', train_acc), global_step=epoch) name, val_acc = test(ctx) print('[Epoch %d] Validation: %s=%f' % (epoch, name, val_acc)) # logging the validation accuracy sw.add_scalar(tag='accuracy_curves', value=('valid_acc', val_acc), global_step=epoch) sw.export_scalars('scalar_dict.json') sw.close()
def train(): image_pool = ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) # define a summary writer that logs data and flushes to the file every 5 seconds sw = SummaryWriter(logdir='./logs_', flush_secs=5) global_step = 0 # paramsG = netG.collect_params() # param_namesG = paramsG.keys() # # paramsD = netD.collect_params() # param_namesD = paramsD.keys() for epoch in range(epochs): if epoch == 0: netG.hybridize() netD.hybridize() # sw.add_graph(netG) # sw.add_graph(netD) tic = time.time() btic = time.time() train_data.reset() iter = 0 for local_step, batch in enumerate(train_data): ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_out = netG(real_in) fake_concat = image_pool.query(nd.concat(real_in, fake_out, dim=1)) with autograd.record(): # Train with fake image # Use image pooling to utilize history images output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([ fake_label, ], [ output, ]) # Train with real image real_concat = nd.concat(real_in, real_out, dim=1) output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([ real_label, ], [ output, ]) trainerD.step(batch.data[0].shape[0]) sw.add_graph((netG)) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_out = netG(real_in) fake_concat = nd.concat(real_in, fake_out, dim=1) output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss( output, real_label) + L1_loss(real_out, fake_out) * lambda1 errG.backward() trainerG.step(batch.data[0].shape[0]) sw.add_scalar(tag='loss', value=('d_loss', errD.mean().asscalar()), global_step=global_step) sw.add_scalar(tag='loss', value=('g_loss', errG.mean().asscalar()), global_step=global_step) global_step += 1 # Log the first batch of images of each epoch if local_step == 0: fake_out = ((fake_out + 1) * 127.5) / 255 sw.add_image('minist_first_minibatch', fake_out, epoch) if iter % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, iter, epoch)) iter = iter + 1 btic = time.time() sw.add_scalar(tag='binary_training_acc', value=('acc', acc), global_step=epoch) # gradsG = [i.grad() for i in netG.collect_params().values()] # gradsD = [i.grad() for i in netD.collect_params().values()] # # logging the gradients of parameters for checking convergence # for i, name in enumerate(param_namesG): # sw.add_histogram(tag=name + 'G', values=gradsG[i], global_step=epoch, bins=1000) # for i, name in enumerate(param_namesD): # sw.add_histogram(tag=name + 'D', values=gradsD[i], global_step=epoch, bins=1000) name, acc = metric.get() metric.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) # # Visualize one generated image for each epoch # fake_img = fake_out[0] # visualize(fake_img) # plt.show() sw.export_scalars('scalar_dict.json') sw.close()