Пример #1
0
def train(train_loader, model, reglog, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # freeze also batch norm layers
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        #adjust learning rate
        learning_rate_decay(optimizer, len(train_loader) * epoch + i, args.lr)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input.cuda())
        target_var = torch.autograd.Variable(target)
        # compute output

        output = forward(input_var, model, reglog.conv)
        output = reglog(output)
        loss = criterion(output, target_var)
        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.verbose and i % 100 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))
Пример #2
0
    def prepare_training(self):
        with self.graph.as_default():
            # Optimizer
            self.global_step = tf.get_variable(
                name='global_step',
                dtype=tf.int64,
                shape=[],
                trainable=False,
                initializer=tf.zeros_initializer)

            self.learning_rate = tf.convert_to_tensor(
                self._config.train.learning_rate, dtype=tf.float32)
            if self._config.train.optimizer == 'adam':
                self._optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate)
            elif self._config.train.optimizer == 'adam_decay':
                self.learning_rate *= learning_rate_decay(
                    self._config, self.global_step)
                self._optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=0.9,
                    beta2=0.98,
                    epsilon=1e-9)
            elif self._config.train.optimizer == 'sgd':
                self._optimizer = tf.train.GradientDescentOptimizer(
                    learning_rate=self.learning_rate)
            elif self._config.train.optimizer == 'mom':
                self._optimizer = tf.train.MomentumOptimizer(
                    self.learning_rate, momentum=0.9)

            # Uniform scaling initializer.
            self._initializer = init_ops.variance_scaling_initializer(
                scale=1.0, mode='fan_avg', distribution='uniform')
Пример #3
0
def train(model, train_loader, test_loader, features, cfg):
    global_step = 0
    lr = 0
    num_batch = int(features['num_samples'] / cfg.batch_size)

    for epoch in range(cfg.epoch):
        losses = 0
        acces = 0

        for step, (batch_xs, batch_ys) in enumerate(tqdm(train_loader, total=num_batch, ncols=50, leave=False, unit='b')):
            if len(batch_ys.shape) <= 1: #only assume shape is (bs,)
                one_hot = torch.FloatTensor(cfg.batch_size, features['num_classes']).zero_()
                batch_ys = batch_ys.unsqueeze_(1)
                one_hot.scatter_(1, batch_ys, 1.)
                batch_ys = one_hot
            batch_xs, batch_ys = Variable(batch_xs), Variable(batch_ys)

            if cfg.use_cuda:
                batch_xs, batch_ys = batch_xs.cuda(), batch_ys.cuda()

            lr, lr_decay_finished = learning_rate_decay(global_step, lr, cfg)
            if not lr_decay_finished:
                optimizer = optim.Adam(model.parameters(), lr=lr)


            out, reconstruction_2d = model(batch_xs, batch_ys)

            classification_loss, reconstruction_loss = model.loss(batch_xs, out, reconstruction_2d, batch_ys)
            loss = 0.5*(classification_loss + reconstruction_loss)
            model.zero_grad()
            loss.backward()
            optimizer.step()
            losses = losses + loss.cpu().data.numpy()[0]

            global_step += 1

        if epoch % 5 == 0:
            for i, (batch_xs, batch_ys) in enumerate(test_loader):
                if len(batch_ys.shape) <= 1: #only assume shape is (bs,)
                    one_hot = torch.FloatTensor(cfg.batch_size, features['num_classes']).zero_()
                    batch_ys = batch_ys.unsqueeze_(1)
                    one_hot.scatter_(1, batch_ys, 1.)
                    batch_ys = one_hot
                batch_xs, batch_ys = Variable(batch_xs), Variable(batch_ys)
                if cfg.use_cuda:
                    batch_xs, batch_ys = batch_xs.cuda(), batch_ys.cuda()
                out, _ = model(batch_xs, batch_ys)
                acc = model.classification_loss(out, batch_ys, 1)
                acces = acces + acc.cpu().data.numpy()[0]

            _, reconstruction_2d = model(batch_xs, batch_ys)
            
            save_image(cfg, epoch, global_step, reconstruction_2d, batch_xs, features, idx=40)

            print('epoch is %d, training loss is %.4f, test acc is %.4f' % (epoch, losses, acces / features['num_test_samples']))
Пример #4
0
    def update(self):
        self.global_step = tf.get_variable('global_step', initializer=0, dtype=tf.int32, trainable=False)
        self.generator_lr = learning_rate_decay(hp.G_LR, global_step=self.global_step)
        # self.discriminator_lr = learning_rate_decay(hp.D_LR, global_step=self.global_step)

        # Generator loss
        # self.reconstruction_loss = tf.reduce_mean(tf.abs(self.ori_out - self.ori_feat))  # ori_out 生成器生成出来的sp
        # self.cycle_loss = tf.reduce_mean(tf.abs(self.cycle_ori_out - self.ori_feat))
        # 以上两个是 GAN 才有的

        self.construction_loss = tf.reduce_mean(tf.abs(self.aim_out - self.aim_mel))  # 另外加的,转换损失

        # self.ori_kl_loss = - 0.5 * tf.reduce_sum(1 + self.ori_log_var - tf.pow(self.ori_mu, 2) - tf.exp(self.ori_log_var))
        # 没有自己变自己的需求了

        self.aim_kl_loss = - 0.5 * tf.reduce_sum(1 + self.aim_log_var - tf.pow(self.aim_mu, 2) - tf.exp(self.aim_log_var))

        # self.cycle_kl_loss = - 0.5 * tf.reduce_sum(1 + self.cycle_log_var - tf.pow(self.cycle_mu, 2) - tf.exp(self.cycle_log_var))
        self.kl_loss_weight = control_weight(self.global_step)

        # self.kl_loss = self.kl_loss_weight * (self.ori_kl_loss + self.aim_kl_loss + self.cycle_kl_loss)
        self.kl_loss = self.kl_loss_weight * (self.aim_kl_loss)
        # kl_loss 修改版本(去掉了另外两个没用上的 loss)

        # self.GAN_G_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.t_G, logits=self.predict_fake_P))
        # self.G_loss = self.reconstruction_loss + self.cycle_loss + self.kl_loss + self.GAN_G_loss
        # 修改版,现在 G 只有一个损失,就是 自身 VAE 的 kl_loss 损失
        self.G_loss = self.kl_loss + self.construction_loss  # 修改,加上self.construction_loss

        # Variables
        trainable_variables = tf.trainable_variables()  # 这个还真么看懂什么作用;再请教!!!
        self.G_vars = [var for var in trainable_variables if 'generator' in var.name]
        # self.D_vars = [var for var in trainable_variables if 'discriminator' in var.name]

        # Optimizer
        self.G_optimizer = tf.train.AdamOptimizer(self.generator_lr)
        # self.D_optimizer = tf.train.AdamOptimizer(self.discriminator_lr)

        # Generator Gradient Clipping And Update  ;  G 梯度裁剪 和 更新
        self.G_clipped = []
        self.G_gvs = self.G_optimizer.compute_gradients(self.G_loss, var_list=self.G_vars)
        """
        computer_gradients(loss, val_list)
        val_list是进行求偏导的变量的列表,默认为graph中收集的变量列表
        这里的操作是计算出各个变量的偏导数(梯度),是为了防止梯度爆炸和梯度消失。通过对gradient的修正,来进行避免。
        """
        for grad, var in self.G_gvs:
            grad = tf.clip_by_norm(grad, 5.)
            """
            tf.clip_by_norm(t,clip_norm,axes=None,name=None)
            指对梯度进行裁剪,通过控制梯度的最大范式,防止梯度爆炸的问题,是一种比较常用的梯度规约的方式。
            """
            self.G_clipped.append((grad, var))
        self.G_train_op = self.G_optimizer.apply_gradients(self.G_clipped, global_step=self.global_step)
        """
Пример #5
0
    def build_training_scheme(self):
        '''
        hp.update_weights: list of strings of regular expressions used to match
        scope prefixes of variables with tf.get_collection. Only these will be updated
        by the graph's train_op: others will be frozen in training. TODO: this comment is now out of place...
        '''

        hp = self.hp
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        if hp.decay_lr:
            self.lr = learning_rate_decay(hp.lr, self.global_step)
        else:
            self.lr = hp.lr

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr,
                                                beta1=hp.beta1,
                                                beta2=hp.beta2,
                                                epsilon=hp.epsilon)
        tf.summary.scalar("lr", self.lr)

        if self.hp.update_weights:
            train_variables = filter_variables_for_update(
                self.hp.update_weights)
            print('Subset of trainable variables chosen for finetuning.'
                  )  ## TODO: add to logging!
            print('Variables not in this list will remain frozen:')
            for variable in train_variables:
                print(variable.name)
        else:
            train_variables = None  ## default value -- everything included in compute_gradients

        ## gradient clipping
        self.gvs = self.optimizer.compute_gradients(
            self.loss, var_list=train_variables
        )  ## var_list: Optional list or tuple of tf.Variable to update to minimize loss
        self.clipped = []
        for grad, var in self.gvs:
            grad = tf.clip_by_value(grad, -1., 1.)
            self.clipped.append((grad, var))
            self.train_op = self.optimizer.apply_gradients(
                self.clipped, global_step=self.global_step)

        # Summary
        self.merged = tf.summary.merge_all()
Пример #6
0
    def optimize(self):
        """
        Optimize the learning rate.
        """
        # global step
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        # learning rate decay
        self.learning_rate = learning_rate_decay(global_step=self.global_step)
        # optimizer
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate)

        # Gradient clipping
        gradients, variables = zip(
            *self.optimizer.compute_gradients(self.loss))
        self.gradients = gradients
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, 1.0)
        self.opt_train = self.optimizer.apply_gradients(
            zip(clipped_gradients, variables), global_step=self.global_step)
    
    ### create dataset
    train_dataset = datasets.MultiFramesDataset(opts, "train")

    
    ### start training
    while model.epoch < opts.epoch_max:

        model.epoch += 1

        ### re-generate train data loader for every epoch
        data_loader = utils.create_data_loader(train_dataset, opts, "train")

        ### update learning rate
        current_lr = utils.learning_rate_decay(opts, model.epoch)

        for param_group in optimizer.param_groups:
            param_group['lr'] = current_lr
        
        ## submodule
        flow_warping = Resample2d().to(device)
        downsampler = nn.AvgPool2d((2, 2), stride=2).to(device)


        ### criterion and loss recorder
        if opts.loss == 'L2':
            criterion = nn.MSELoss(size_average=True)
        elif opts.loss == 'L1':
            criterion = nn.L1Loss(size_average=True)
        else:
Пример #8
0
  if os.path.exists(args.continual):
    state_dict = torch.load(args.continual)
    initial_iter = state_dict['iter']
    model.encoder.load_state_dict(state_dict['encoder'])
    model.decoder.load_state_dict(state_dict['decoder'])
    optimizer.load_state_dict(state_dict['optimizer'])


# log writer
writer = SummaryWriter(log_dir=str(log_dir))

# for maximum iteration
model.to(device)
for i in tqdm(range(initial_iter, args.max_iter)):
  # adjust learning rate
  lr = learning_rate_decay(args.learning_rate, args.learning_rate_decay, i)
  for group in optimizer.param_groups:
    group['lr'] = lr

  # get images
  content_images = next(content_iter).to(device)
  style_images = next(style_iter).to(device)

  # calculate loss
  g, loss_content, loss_style = model(content_images, style_images)
  loss_content = args.content_weight * loss_content
  loss_style = args.style_weight * loss_style
  loss = loss_content + loss_style

  # optimize the network
  optimizer.zero_grad()  
    train_dataset = datasets_multiple.MultiFramesDataset(
        opts, "rain_removal", "train")
    train_haze_dataset = datasets_multiple_haze.MultiFramesHazeDataset(
        opts, "rain_removal_haze", "train")

    ### start training
    while multi_model_res.epoch < opts.epoch_max:
        multi_model_res.epoch += 1

        ### re-generate train data loader for every epoch
        data_loader = utils.create_data_loader(train_dataset, opts, "train")
        data_haze_loader = utils.create_data_loader(train_haze_dataset, opts,
                                                    "train")

        ### update learning rate
        current_lr = utils.learning_rate_decay(opts, multi_model_res.epoch)

        for param_group in optimizer.param_groups:
            param_group['lr'] = current_lr

        if opts.loss == 'L2':
            criterion = nn.MSELoss(size_average=True)
        elif opts.loss == 'L1':
            criterion = nn.L1Loss(size_average=True)
        else:
            raise Exception("Unsupported criterion %s" % opts.loss)

        criterion_ssim = SSIM()
        criterion_mse = nn.MSELoss(size_average=True)
        criterion_l1 = nn.L1Loss(size_average=True)
Пример #10
0
    def build(self):

        if self.training:
            # Training Scheme
            self.learning_rate_EG = learning_rate_decay(
                self.learning_rate_EG, self.global_step)
            self.optimizer_EG = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate_EG)
            tf.summary.scalar("learning_rate_EG", self.learning_rate_EG)

            self.learning_rate_D = learning_rate_decay(self.learning_rate_D,
                                                       self.global_step)
            self.optimizer_D = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate_D)
            tf.summary.scalar("learning_rate_D", self.learning_rate_D)

            if not self.fine_tune:
                (self.idx, self.x, self.y, self.tx, self.ty) = data()
            else:
                self.x = tf.placeholder(tf.float32,
                                        [self.K] + list(self.img_size),
                                        name='x')
                self.y = tf.placeholder(tf.float32,
                                        [self.K] + list(self.img_size),
                                        name='y')
                self.tx = tf.placeholder(tf.float32,
                                         list(self.img_size),
                                         name='tx')
                self.ty = tf.placeholder(tf.float32,
                                         list(self.img_size),
                                         name='ty')

            # Embedder
            # Calculate average encoding vector for video and AdaIn params input
            self.e_hat, self.psi_hat = self.Embedder(self.x,
                                                     self.y,
                                                     sn=True,
                                                     reuse=False)

            if not self.fine_tune:
                # Generator
                # Generate frame using landmarks from frame t
                self.x_hat = self.Generator(self.ty,
                                            psi_Pe=self.psi_hat,
                                            sn=True,
                                            reuse=False)
                # Discriminator
                # real score for fake image
                self.r_x_hat, self.D_act_hat = self.Discriminator(self.x_hat,
                                                                  self.ty,
                                                                  i=self.idx,
                                                                  e_new=None,
                                                                  sn=True,
                                                                  reuse=False)
                # real score for real image
                self.r_x, self.D_act = self.Discriminator(self.tx,
                                                          self.ty,
                                                          i=self.idx,
                                                          e_new=None,
                                                          sn=True,
                                                          reuse=True)
            else:
                x, y, _, _ = get_frame_data(self.frames)

                embedder_var_list = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, 'embedder')

                embedder_saver = tf.train.Saver(var_list=embedder_var_list)
                embedder_saver.restore(self.sess,
                                       tf.train.latest_checkpoint(self.logdir))

                self.sess.run(tf.global_variables_initializer)

                e_hat, psi_hat = self.sess.run([self.e_hat, self.psi_hat],
                                               feeddict={
                                                   self.x: x,
                                                   self.y: y
                                               })

                # Generator
                # Generate frame using landmarks from frame t
                self.x_hat = self.Generator(self.ty,
                                            psi_Pe=None,
                                            psi_hat_init=psi_hat,
                                            sn=True,
                                            reuse=False)
                # Discriminator
                # real score for fake image
                self.r_x_hat, self.D_act_hat = self.Discriminator(self.x_hat,
                                                                  self.ty,
                                                                  i=None,
                                                                  e_new=e_hat,
                                                                  sn=True,
                                                                  reuse=False)
                # real score for real image
                self.r_x, self.D_act = self.Discriminator(self.tx,
                                                          self.ty,
                                                          i=None,
                                                          e_new=e_hat,
                                                          sn=True,
                                                          reuse=True)

            self.loss_CNT = self.loss_cnt(self.tx, self.x_hat)
            self.loss_ADV = self.loss_adv(self.r_x_hat, self.D_act,
                                          self.D_act_hat)

            if not self.fine_tune:
                self.loss_MCH = self.loss_mch(
                    self.e_hat,
                    tf.squeeze(tf.nn.embedding_lookup(self.W, self.idx),
                               axis=1))

                self.loss_EG = self.loss_CNT + self.loss_ADV + self.loss_MCH
            else:
                self.loss_EG = self.loss_CNT + self.loss_ADV

            self.loss_DSC = self.loss_dsc(self.r_x, self.r_x_hat)

            tf.summary.scalar("loss_CNT", self.loss_CNT)
            tf.summary.scalar("loss_ADV", self.loss_ADV)
            if not self.fine_tune:
                tf.summary.scalar("loss_MCH", self.loss_MCH)
            tf.summary.scalar("loss_EG", self.loss_EG)
            tf.summary.scalar("loss_DSC", self.loss_DSC)
            tf.summary.scalar("loss_r_x_hat", tf.reduce_mean(self.r_x_hat))
            tf.summary.scalar("loss_r_x", tf.reduce_mean(self.r_x))

            # Embedder & Generator Optimization
            EG_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            'generator')
            if not self.fine_tune:
                EG_var_list += tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, 'embedder')
            self.grads_EG = self.optimizer_EG.compute_gradients(
                self.loss_EG, var_list=EG_var_list)
            ## gradient clipping
            self.clipped_EG = [
                (tf.clip_by_value(grad, -1., 1.) if not grad == None else grad,
                 var) for grad, var in self.grads_EG
            ]
            self.train_EG = self.optimizer_EG.apply_gradients(
                self.clipped_EG)  #, global_step=self.global_step)

            # Discriminator Optimization
            D_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           'discriminator')
            self.grads_D = self.optimizer_D.compute_gradients(
                self.loss_DSC, var_list=D_var_list)
            ## gradient clipping
            self.clipped_D = [
                (tf.clip_by_value(grad, -1., 1.) if not grad == None else grad,
                 var) for grad, var in self.grads_D
            ]
            # Updating Global step only during EG optimization as two optimization happens during trainig, so second incrementation of global step.
            self.train_D = self.optimizer_D.apply_gradients(
                self.clipped_D)  #, global_step=self.global_step)

            # Global step increment
            self.global_step_increment = tf.assign_add(
                self.global_step, 1, name="global_step_increment")

            tf.summary.image('Generator/1/x', self.tx)
            tf.summary.image('Generator/2/y', self.ty)
            tf.summary.image('Generator/3/x_hat', self.x_hat)

            # Summary
            self.merged = tf.summary.merge_all()
        else:

            self.ty = tf.placeholder(tf.float32, [None] + list(self.img_size),
                                     name='ty')

            if not self.fine_tune:
                self.x = tf.placeholder(tf.float32,
                                        [None] + list(self.img_size),
                                        name='x')
                self.y = tf.placeholder(tf.float32,
                                        [None] + list(self.img_size),
                                        name='y')

                # Embedder
                # Calculate average encoding vector for video and AdaIn params input
                self.e_hat, self.psi_hat = self.Embedder(self.x,
                                                         self.y,
                                                         sn=True,
                                                         reuse=False)

                # Generator
                # Generate frame using landmarks from frame t
                self.x_hat = self.Generator(self.ty,
                                            psi_Pe=self.psi_hat,
                                            sn=True,
                                            reuse=False)
            else:
                # Generator
                # Generate frame using landmarks from frame t
                self.x_hat = self.Generator(self.ty,
                                            psi_Pe=None,
                                            psi_hat_init=None,
                                            sn=True,
                                            reuse=False)
    vgg = vgg(vgg_model)
    vgg.eval()

    fusion_model.train()
    three_dim_model.train()
    FlowNet.train()

    train_dataset = datasets_multiple.MultiFramesDataset(opts, "train")

    loss_fn = torch.nn.L1Loss(reduce=True, size_average=True)

    while three_dim_model.epoch < opts.epoch_max:
        three_dim_model.epoch += 1

        data_loader = utils.create_data_loader(train_dataset, opts, "train")
        current_lr = utils.learning_rate_decay(opts, three_dim_model.epoch)

        for param_group in optimizer.param_groups:
            param_group['lr'] = current_lr
        for param_group in optimizer_flow.param_groups:
            param_group['lr'] = current_lr * 0.001

        error_last = 1e8
        ts = datetime.now()

        for iteration, batch in enumerate(data_loader, 1):
            total_iter = (three_dim_model.epoch -
                          1) * opts.train_epoch_size + iteration
            cross_num = 1

            frame_i = []
Пример #12
0
def train(args):
    # set the logger
    logger = Logger('./logs')

    # GPU enabling
    if (args.gpu != None):
        use_cuda = True
        dtype = torch.cuda.FloatTensor
        torch.cuda.set_device(args.gpu)
        print("Current device: %s" % torch.cuda.get_device_name(args.gpu))

    # define networks
    g_AtoB = Generator().type(dtype)
    g_BtoA = Generator().type(dtype)
    d_A = Discriminator().type(dtype)
    d_B = Discriminator().type(dtype)

    # optimizers
    optimizer_generators = Adam(
        list(g_AtoB.parameters()) + list(g_BtoA.parameters()), INITIAL_LR)
    optimizer_d_A = Adam(d_A.parameters(), INITIAL_LR)
    optimizer_d_B = Adam(d_B.parameters(), INITIAL_LR)

    # loss criterion
    criterion_mse = torch.nn.MSELoss()
    criterion_l1 = torch.nn.L1Loss()

    # get training data
    dataset_transform = transforms.Compose([
        transforms.Resize(int(IMAGE_SIZE * 1),
                          Image.BICUBIC),  # scale shortest side to image_size
        transforms.RandomCrop(
            (IMAGE_SIZE, IMAGE_SIZE)),  # random center image_size out
        transforms.ToTensor(),  # turn image from [0-255] to [0-1]
        transforms.Normalize(mean=(0.5, 0.5, 0.5),
                             std=(0.5, 0.5, 0.5))  # normalize
    ])
    dataloader = DataLoader(ImgPairDataset(args.dataroot, dataset_transform,
                                           'train'),
                            batch_size=BATCH_SIZE,
                            shuffle=True)

    # get some test data to display periodically
    test_data_A = torch.tensor([]).type(dtype)
    test_data_B = torch.tensor([]).type(dtype)
    for i in range(NUM_TEST_SAMPLES):
        imgA = ImgPairDataset(args.dataroot, dataset_transform,
                              'test')[i]['A'].type(dtype).unsqueeze(0)
        imgB = ImgPairDataset(args.dataroot, dataset_transform,
                              'test')[i]['B'].type(dtype).unsqueeze(0)
        test_data_A = torch.cat((test_data_A, imgA), dim=0)
        test_data_B = torch.cat((test_data_B, imgB), dim=0)

        fileStrA = 'visualization/test_%d/%s/' % (i, 'B_inStyleofA')
        fileStrB = 'visualization/test_%d/%s/' % (i, 'A_inStyleofB')
        if not os.path.exists(fileStrA):
            os.makedirs(fileStrA)
        if not os.path.exists(fileStrB):
            os.makedirs(fileStrB)

        fileStrA = 'visualization/test_original_%s_%04d.png' % ('A', i)
        fileStrB = 'visualization/test_original_%s_%04d.png' % ('B', i)
        utils.save_image(
            fileStrA,
            ImgPairDataset(args.dataroot, dataset_transform,
                           'test')[i]['A'].data)
        utils.save_image(
            fileStrB,
            ImgPairDataset(args.dataroot, dataset_transform,
                           'test')[i]['B'].data)

    # replay buffers
    replayBufferA = utils.ReplayBuffer(50)
    replayBufferB = utils.ReplayBuffer(50)

    # training loop
    step = 0
    for e in range(EPOCHS):
        startTime = time.time()
        for idx, batch in enumerate(dataloader):
            real_A = batch['A'].type(dtype)
            real_B = batch['B'].type(dtype)

            # some examples seem to have only 1 color channel instead of 3
            if (real_A.shape[1] != 3):
                continue
            if (real_B.shape[1] != 3):
                continue

            # -----------------
            #  train generators
            # -----------------
            optimizer_generators.zero_grad()
            utils.learning_rate_decay(INITIAL_LR, e, EPOCHS,
                                      optimizer_generators)

            # GAN loss
            fake_A = g_BtoA(real_B)
            disc_fake_A = d_A(fake_A)
            fake_B = g_AtoB(real_A)
            disc_fake_B = d_B(fake_B)

            replayBufferA.push(torch.tensor(fake_A.data))
            replayBufferB.push(torch.tensor(fake_B.data))

            target_real = Variable(torch.ones_like(disc_fake_A)).type(dtype)
            target_fake = Variable(torch.zeros_like(disc_fake_A)).type(dtype)

            loss_gan_AtoB = criterion_mse(disc_fake_B, target_real)
            loss_gan_BtoA = criterion_mse(disc_fake_A, target_real)
            loss_gan = loss_gan_AtoB + loss_gan_BtoA

            # cyclic reconstruction loss
            cyclic_A = g_BtoA(fake_B)
            cyclic_B = g_AtoB(fake_A)
            loss_cyclic_AtoBtoA = criterion_l1(cyclic_A,
                                               real_A) * CYCLIC_WEIGHT
            loss_cyclic_BtoAtoB = criterion_l1(cyclic_B,
                                               real_B) * CYCLIC_WEIGHT
            loss_cyclic = loss_cyclic_AtoBtoA + loss_cyclic_BtoAtoB

            # identity loss
            loss_identity = 0
            loss_identity_A = 0
            loss_identity_B = 0
            if (args.use_identity == True):
                identity_A = g_BtoA(real_A)
                identity_B = g_AtoB(real_B)
                loss_identity_A = criterion_l1(identity_A,
                                               real_A) * 0.5 * CYCLIC_WEIGHT
                loss_identity_B = criterion_l1(identity_B,
                                               real_B) * 0.5 * CYCLIC_WEIGHT
                loss_identity = loss_identity_A + loss_identity_B

            loss_generators = loss_gan + loss_cyclic + loss_identity
            loss_generators.backward()
            optimizer_generators.step()

            # -----------------
            #  train discriminators
            # -----------------
            optimizer_d_A.zero_grad()
            utils.learning_rate_decay(INITIAL_LR, e, EPOCHS, optimizer_d_A)

            fake_A = replayBufferA.sample(1).detach()
            disc_fake_A = d_A(fake_A)
            disc_real_A = d_A(real_A)
            loss_d_A = 0.5 * (criterion_mse(disc_real_A, target_real) +
                              criterion_mse(disc_fake_A, target_fake))

            loss_d_A.backward()
            optimizer_d_A.step()

            optimizer_d_B.zero_grad()
            utils.learning_rate_decay(INITIAL_LR, e, EPOCHS, optimizer_d_B)

            fake_B = replayBufferB.sample(1).detach()
            disc_fake_B = d_B(fake_B)
            disc_real_B = d_B(real_B)
            loss_d_B = 0.5 * (criterion_mse(disc_real_B, target_real) +
                              criterion_mse(disc_fake_B, target_fake))

            loss_d_B.backward()
            optimizer_d_B.step()

            #log info and save sample images
            if ((idx % 250) == 0):
                # eval on some sample images
                g_AtoB.eval()
                g_BtoA.eval()

                test_B_hat = g_AtoB(test_data_A).cpu()
                test_A_hat = g_BtoA(test_data_B).cpu()

                fileBaseStr = 'test_%d_%d' % (e, idx)
                for i in range(NUM_TEST_SAMPLES):
                    fileStrA = 'visualization/test_%d/%s/%03d_%04d.png' % (
                        i, 'B_inStyleofA', e, idx)
                    fileStrB = 'visualization/test_%d/%s/%03d_%04d.png' % (
                        i, 'A_inStyleofB', e, idx)
                    utils.save_image(fileStrA, test_A_hat[i].data)
                    utils.save_image(fileStrB, test_B_hat[i].data)

                g_AtoB.train()
                g_BtoA.train()

                endTime = time.time()
                timeForIntervalIterations = endTime - startTime
                startTime = endTime

                print(
                    'Epoch [{:3d}/{:3d}], Training [{:4d}/{:4d}], Time Spent (s): [{:4.4f}], Losses: [G_GAN: {:4.4f}][G_CYC: {:4.4f}][G_IDT: {:4.4f}][D_A: {:4.4f}][D_B: {:4.4f}]'
                    .format(e, EPOCHS, idx, len(dataloader),
                            timeForIntervalIterations, loss_gan, loss_cyclic,
                            loss_identity, loss_d_A, loss_d_B))

                # tensorboard logging
                info = {
                    'loss_generators':
                    loss_generators.item(),
                    'loss_gan_AtoB':
                    loss_gan_AtoB.item(),
                    'loss_gan_BtoA':
                    loss_gan_BtoA.item(),
                    'loss_cyclic_AtoBtoA':
                    loss_cyclic_AtoBtoA.item(),
                    'loss_cyclic_BtoAtoB':
                    loss_cyclic_BtoAtoB.item(),
                    'loss_cyclic':
                    loss_cyclic.item(),
                    'loss_d_A':
                    loss_d_A.item(),
                    'loss_d_B':
                    loss_d_B.item(),
                    'lr_optimizer_generators':
                    optimizer_generators.param_groups[0]['lr'],
                    'lr_optimizer_d_A':
                    optimizer_d_A.param_groups[0]['lr'],
                    'lr_optimizer_d_B':
                    optimizer_d_B.param_groups[0]['lr'],
                }
                if (args.use_identity):
                    info['loss_identity_A'] = loss_identity_A.item()
                    info['loss_identity_B'] = loss_identity_B.item()
                for tag, value in info.items():
                    logger.scalar_summary(tag, value, step)

                info = {
                    'test_A_hat':
                    test_A_hat.data.numpy().transpose(0, 2, 3, 1),
                    'test_B_hat':
                    test_B_hat.data.numpy().transpose(0, 2, 3, 1),
                }
                for tag, images in info.items():
                    logger.image_summary(tag, images, step)

            step += 1

        # save after every epoch
        g_AtoB.eval()
        g_BtoA.eval()
        d_A.eval()
        d_B.eval()

        if use_cuda:
            g_AtoB.cpu()
            g_BtoA.cpu()
            d_A.cpu()
            d_B.cpu()

        if not os.path.exists("models"):
            os.makedirs("models")
        filename_gAtoB = "models/" + str('g_AtoB') + "_epoch_" + str(
            e) + ".model"
        filename_gBtoA = "models/" + str('g_BtoA') + "_epoch_" + str(
            e) + ".model"
        filename_dA = "models/" + str('d_A') + "_epoch_" + str(e) + ".model"
        filename_dB = "models/" + str('d_B') + "_epoch_" + str(e) + ".model"
        torch.save(g_AtoB.state_dict(), filename_gAtoB)
        torch.save(g_BtoA.state_dict(), filename_gBtoA)
        torch.save(d_A.state_dict(), filename_dA)
        torch.save(d_B.state_dict(), filename_dB)

        if use_cuda:
            g_AtoB.cuda()
            g_BtoA.cuda()
            d_A.cuda()
            d_B.cuda()
Пример #13
0
    def update(self):
        self.global_step = tf.get_variable('global_step',
                                           initializer=0,
                                           dtype=tf.int32,
                                           trainable=False)
        self.generator_lr = learning_rate_decay(hp.G_LR,
                                                global_step=self.global_step)
        self.discriminator_lr = learning_rate_decay(
            hp.D_LR, global_step=self.global_step)

        # Generator loss
        self.reconstruction_loss = tf.reduce_mean(
            tf.abs(self.ori_out - self.ori_feat))
        self.cycle_loss = tf.reduce_mean(
            tf.abs(self.cycle_ori_out - self.ori_feat))
        self.ori_kl_loss = -0.5 * tf.reduce_sum(1 + self.ori_log_var -
                                                tf.pow(self.ori_mu, 2) -
                                                tf.exp(self.ori_log_var))
        self.aim_kl_loss = -0.5 * tf.reduce_sum(1 + self.aim_log_var -
                                                tf.pow(self.aim_mu, 2) -
                                                tf.exp(self.aim_log_var))
        self.cycle_kl_loss = -0.5 * tf.reduce_sum(1 + self.cycle_log_var -
                                                  tf.pow(self.cycle_mu, 2) -
                                                  tf.exp(self.cycle_log_var))
        self.kl_loss_weight = control_weight(self.global_step)
        self.kl_loss = self.kl_loss_weight * (
            self.ori_kl_loss + self.aim_kl_loss + self.cycle_kl_loss)
        self.GAN_G_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.t_G, logits=self.predict_fake_P))
        self.G_loss = self.reconstruction_loss + self.cycle_loss + self.kl_loss + self.GAN_G_loss

        # Discriminator loss
        self.D_fake_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.t_D_fake, logits=self.predict_fake_P))
        self.D_real_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.t_D_real, logits=self.predict_fake_P))
        self.GAN_D_loss = self.D_fake_loss + self.D_real_loss
        self.D_loss = self.GAN_D_loss

        # Variables
        trainable_variables = tf.trainable_variables()
        self.G_vars = [
            var for var in trainable_variables if 'generator' in var.name
        ]
        self.D_vars = [
            var for var in trainable_variables if 'discriminator' in var.name
        ]

        # Optimizer
        self.G_optimizer = tf.train.AdamOptimizer(self.generator_lr)
        self.D_optimizer = tf.train.AdamOptimizer(self.discriminator_lr)

        # Generator Gradient Clipping And Update
        self.G_clipped = []
        self.G_gvs = self.G_optimizer.compute_gradients(self.G_loss,
                                                        var_list=self.G_vars)
        for grad, var in self.G_gvs:
            grad = tf.clip_by_norm(grad, 5.)
            self.G_clipped.append((grad, var))
        self.G_train_op = self.G_optimizer.apply_gradients(
            self.G_clipped, global_step=self.global_step)

        # Discriminator Gradient Clipping And Update
        self.D_clipped = []
        self.D_gvs = self.D_optimizer.compute_gradients(self.D_loss,
                                                        var_list=self.D_vars)
        for grad, var in self.D_gvs:
            grad = tf.clip_by_norm(grad, 5.)
            self.D_clipped.append((grad, var))
        self.D_train_op = self.D_optimizer.apply_gradients(
            self.D_clipped, global_step=self.global_step)
Пример #14
0
def run():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch Environment")

    train_parser = parser.add_argument_group("Train Parameters")
    train_parser.add_argument("--epochs",
                              type=int,
                              default=160,
                              metavar="E",
                              help="number of epochs to train (default: 10)")
    train_parser.add_argument(
        "--batch-size",
        type=int,
        default=128,
        metavar="B",
        help="input batch size for training (default: 128)")
    train_parser.add_argument(
        "--test-batch-size",
        type=int,
        default=128,
        metavar="BT",
        help="input batch size for testing (default: 128)")
    train_parser.add_argument("--lr_decay",
                              type=float,
                              default=0.1,
                              metavar="LD",
                              help="learning rate decay rate")
    train_parser.add_argument("--schedule",
                              type=int,
                              nargs="*",
                              default=[80, 120],
                              help="learning rate is decayed at these epochs")
    train_parser.add_argument("--warmup-epochs",
                              type=int,
                              default=5,
                              metavar="WE",
                              help="number of warmup epochs")
    train_parser.add_argument("--no-cuda",
                              action="store_true",
                              default=False,
                              help="disables CUDA training")
    train_parser.add_argument(
        "--seed",
        type=int,
        default=7186021514134990023,
        metavar="S",
        help="random seed (default: 7186021514134990023)")

    simulator_parser = parser.add_argument_group("Simulator Parameters")
    simulator_parser.add_argument("--sim-size",
                                  type=int,
                                  default=16,
                                  metavar="N",
                                  help="size of simulator")
    simulator_parser.add_argument("--sim-gamma-shape",
                                  type=float,
                                  default=100,
                                  metavar="GSH",
                                  help="gamma shape parameter")
    simulator_parser.add_argument("--sim-gamma-scale",
                                  type=float,
                                  default=1.28,
                                  metavar="GSC",
                                  help="gamma scale parameter")

    optimizer_parser = parser.add_argument_group("Optimizer Parameters")
    optimizer_parser.add_argument("--lr",
                                  type=float,
                                  default=0.1,
                                  metavar="LR",
                                  help="learning rate (default: 0.1)")
    optimizer_parser.add_argument("--momentum",
                                  type=float,
                                  default=0.9,
                                  metavar="M",
                                  help="SGD momentum (default: 0.9)")
    optimizer_parser.add_argument("--dc",
                                  type=float,
                                  default=2,
                                  metavar="DC",
                                  help="Delay Compensation (default: 0)")
    optimizer_parser.add_argument("--weight-decay",
                                  type=float,
                                  default=1e-4,
                                  metavar="WD",
                                  help="SGD weight decay (default: 0)")

    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    random.seed(torch.initial_seed())

    print("*** Configuration ***")
    for k in vars(args):
        print(str(k), ":", str(getattr(args, k)))

    train_set, test_set = get_cifar_10_data_set(
    )  # get CIFAR-10 train and test set
    args.train_loader = data_loader(train_set, is_train=True, args=args)
    args.test_loader = data_loader(test_set, is_train=False, args=args)
    args.model = resnet20_cifar()  # get ResNet-20 Model
    if args.cuda:
        args.model = args.model.cuda()
    args.loss_fn = nn.CrossEntropyLoss()  # use cross-entropy loss

    # create optimizer
    args.optimizer = optim.SGD(args.model.parameters(),
                               lr=args.lr,
                               momentum=args.momentum,
                               weight_decay=args.weight_decay)

    assert len(args.optimizer.param_groups) == 1

    # initialize optimizer's momentum
    for p in args.model.parameters():
        args.optimizer.state[p]["momentum_buffer"] = torch.zeros_like(p.data)

    # clone weights for master
    args.master_weights = init_weights(args.model.parameters())

    # clone weights, one for each  worker
    args.worker_weights = [
        init_weights(args.model.parameters()) for _ in range(args.sim_size)
    ]

    # clone optimizer, one for each  worker
    args.worker_momentum = [
        init_momentum(args.model.parameters()) for _ in range(args.sim_size)
    ]

    # create the gamma distribution order
    args.worker_order = iter(GammaRandomWorkerSelection(args))

    # initialize dana
    args.momentum_sum = {
        id(p): torch.zeros_like(p)
        for p in args.model.parameters()
    }

    # initialize warmup
    args.warmup_lr = np.linspace(args.lr / args.sim_size, args.lr,
                                 len(args.train_loader) *
                                 args.warmup_epochs).tolist()

    print("*** Training with DANA-DC ***")

    for epoch in range(args.epochs):
        learning_rate_decay(epoch, args)
        train(epoch, args)
        evaluate(epoch, args)