Пример #1
0
 def __init__(self, batch_loader, devices, optimizer, config):
     self.dic = {}
     self.loss = 0
     outputs = []
     self.config = config
     tower_grads = []
     n_gpus = len(devices)
     x, y = batch_loader.get_batch()
     inputs = tf.split(axis=0, num_or_size_splits=n_gpus, value=x)
     labels = tf.split(axis=0, num_or_size_splits=n_gpus, value=y)
     for i in range(n_gpus):
         with tf.device(devices[i]):
             with tf.variable_scope('UNet'):
                 print(devices[i])
                 try:
                     outputs.append(self.build_tower(inputs[i]))
                 except:
                     tf.get_variable_scope().reuse_variables()
                     outputs.append(self.build_tower(inputs[i]))
                 loss = tf.reduce_mean(tf.square(outputs[-1] - labels[i]))
                 if config.is_training:
                     tower_grads.append(optimizer.compute_gradients(loss))
                     self.train_step = optimizer.apply_gradients(ops.average_gradients(tower_grads))
                 self.loss += loss / n_gpus
                 tf.get_variable_scope().reuse_variables()
     self.output = tf.concat(outputs, axis=0)
     if config.is_training:
         self.train_step = optimizer.apply_gradients(ops.average_gradients(tower_grads))
Пример #2
0
    def _prepare_training(self):
        """Prepare Training

        Make tensorflow's graph.
        To support Multi-GPU, divide mini-batch.
        And this program has resume function.
        If there is checkpoint file in FLAGS.gan_dir/log, load checkpoint file and restart training.
        """
        assert FLAGS.batch_size >= FLAGS.style_ids_n, 'batch_size must be greater equal than style_ids_n'
        self.gpu_n = len(FLAGS.gpu_ids.split(','))
        self.embedding_chars = set_chars_type(FLAGS.chars_type)
        assert self.embedding_chars != [], 'embedding_chars is empty'
        self.char_embedding_n = len(self.embedding_chars)
        self.z_size = FLAGS.style_z_size + self.char_embedding_n

        with tf.device('/cpu:0'):
            # Set embeddings from uniform distribution
            style_embedding_np = np.random.uniform(
                -1, 1,
                (FLAGS.style_ids_n, FLAGS.style_z_size)).astype(np.float32)
            with tf.variable_scope('embeddings'):
                self.style_embedding = tf.Variable(style_embedding_np,
                                                   name='style_embedding')

            self.style_ids = tf.placeholder(tf.int32, (FLAGS.batch_size, ),
                                            name='style_ids')
            self.char_ids = tf.placeholder(tf.int32, (FLAGS.batch_size, ),
                                           name='char_ids')
            self.is_train = tf.placeholder(tf.bool, name='is_train')
            self.real_imgs = tf.placeholder(tf.float32,
                                            (FLAGS.batch_size, FLAGS.img_width,
                                             FLAGS.img_height, FLAGS.img_dim),
                                            name='real_imgs')
            self.labels = tf.placeholder(
                tf.float32, (FLAGS.batch_size, self.char_embedding_n),
                name='labels')

            d_opt = tf.train.AdamOptimizer(learning_rate=0.0001,
                                           beta1=0.,
                                           beta2=0.9)
            g_opt = tf.train.AdamOptimizer(learning_rate=0.0001,
                                           beta1=0.,
                                           beta2=0.9)

        # Initialize lists for multi gpu
        fake_imgs = [0] * self.gpu_n
        d_loss = [0] * self.gpu_n
        g_loss = [0] * self.gpu_n

        d_grads = [0] * self.gpu_n
        g_grads = [0] * self.gpu_n

        divided_batch_size = FLAGS.batch_size // self.gpu_n
        is_not_first = False

        # Build graph
        for i in range(self.gpu_n):
            batch_start = i * divided_batch_size
            batch_end = (i + 1) * divided_batch_size
            with tf.device('/gpu:{}'.format(i)):
                if FLAGS.arch == 'DCGAN':
                    generator = GeneratorDCGAN(img_size=(FLAGS.img_width,
                                                         FLAGS.img_height),
                                               img_dim=FLAGS.img_dim,
                                               z_size=self.z_size,
                                               layer_n=4,
                                               k_size=3,
                                               smallest_hidden_unit_n=64,
                                               is_bn=False)
                    discriminator = DiscriminatorDCGAN(
                        img_size=(FLAGS.img_width, FLAGS.img_height),
                        img_dim=FLAGS.img_dim,
                        layer_n=4,
                        k_size=3,
                        smallest_hidden_unit_n=64,
                        is_bn=False)
                elif FLAGS.arch == 'ResNet':
                    generator = GeneratorResNet(k_size=3, smallest_unit_n=64)
                    discriminator = DiscriminatorResNet(k_size=3,
                                                        smallest_unit_n=64)

                # If sum of (style/char)_ids is less than -1, z is generated from uniform distribution
                style_z = tf.cond(
                    tf.less(
                        tf.reduce_sum(self.style_ids[batch_start:batch_end]),
                        0), lambda: tf.random_uniform(
                            (divided_batch_size, FLAGS.style_z_size), -1, 1),
                    lambda: tf.nn.embedding_lookup(
                        self.style_embedding, self.style_ids[batch_start:
                                                             batch_end]))
                char_z = tf.one_hot(self.char_ids[batch_start:batch_end],
                                    self.char_embedding_n)
                z = tf.concat([style_z, char_z], axis=1)

                # Generate fake images
                fake_imgs[i] = generator(z,
                                         is_reuse=is_not_first,
                                         is_train=self.is_train)

                # Calculate loss
                d_real = discriminator(self.real_imgs[batch_start:batch_end],
                                       is_reuse=is_not_first,
                                       is_train=self.is_train)
                d_fake = discriminator(fake_imgs[i],
                                       is_reuse=True,
                                       is_train=self.is_train)
                d_loss[i] = -(tf.reduce_mean(d_real) - tf.reduce_mean(d_fake))
                g_loss[i] = -tf.reduce_mean(d_fake)

                # Calculate gradient Penalty
                epsilon = tf.random_uniform((divided_batch_size, 1, 1, 1),
                                            minval=0.,
                                            maxval=1.)
                interp = self.real_imgs[batch_start:batch_end] + epsilon * (
                    fake_imgs[i] - self.real_imgs[batch_start:batch_end])
                d_interp = discriminator(interp,
                                         is_reuse=True,
                                         is_train=self.is_train)
                grads = tf.gradients(d_interp, [interp])[0]
                slopes = tf.sqrt(
                    tf.reduce_sum(tf.square(grads), reduction_indices=[-1]))
                grad_penalty = tf.reduce_mean((slopes - 1.)**2)
                d_loss[i] += 10 * grad_penalty

                # Get trainable variables
                d_vars = [
                    var for var in tf.trainable_variables()
                    if 'discriminator' in var.name
                ]
                g_vars = [
                    var for var in tf.trainable_variables()
                    if 'generator' in var.name
                ]

                d_grads[i] = d_opt.compute_gradients(d_loss[i],
                                                     var_list=d_vars)
                g_grads[i] = g_opt.compute_gradients(g_loss[i],
                                                     var_list=g_vars)

            is_not_first = True

        with tf.device('/cpu:0'):
            self.fake_imgs = tf.concat(fake_imgs, axis=0)
            avg_d_grads = average_gradients(d_grads)
            avg_g_grads = average_gradients(g_grads)
            self.d_train = d_opt.apply_gradients(avg_d_grads)
            self.g_train = g_opt.apply_gradients(avg_g_grads)

        # Calculate summary for tensorboard
        tf.summary.scalar('d_loss', -(sum(d_loss) / len(d_loss)))
        tf.summary.scalar('g_loss', -(sum(g_loss) / len(g_loss)))
        self.summary = tf.summary.merge_all()

        # Setup session
        sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            visible_device_list=FLAGS.gpu_ids))
        self.sess = tf.Session(config=sess_config)
        self.saver = tf.train.Saver(max_to_keep=5)

        # If checkpoint is found, restart training
        checkpoint = tf.train.get_checkpoint_state(self.dst_log)
        if checkpoint:
            saver_resume = tf.train.Saver()
            saver_resume.restore(self.sess, checkpoint.model_checkpoint_path)
            self.epoch_start = int(
                checkpoint.model_checkpoint_path.split('-')[-1])
            print('restore ckpt')
        else:
            self.sess.run(tf.global_variables_initializer())
            self.epoch_start = 0

        # Setup writer for tensorboard
        self.writer = tf.summary.FileWriter(self.dst_log)
Пример #3
0
    def model(self, inp_x, inp_y, time_x, time_y):
        # want to generate [inp_x, time_y] -> inp_y
        G_opt_grads = []
        D_Y_opt_grads = []
        F_opt_grads = []
        D_X_opt_grads = []

        fake_x_list = tf.split(self.fake_x, len(self.use_gpu))
        fake_y_list = tf.split(self.fake_y, len(self.use_gpu))
        inp_x_list = tf.split(inp_x, len(self.use_gpu))
        inp_y_list = tf.split(inp_y, len(self.use_gpu))
        time_x_list = tf.split(time_x, len(self.use_gpu))
        time_y_list = tf.split(time_y, len(self.use_gpu))

        tot_G_loss = 0.
        tot_F_loss = 0.
        tot_D_Y_loss = 0.
        tot_D_X_loss = 0.

        fake_xs = []
        fake_ys = []

        with tf.variable_scope(tf.get_variable_scope()):
            for i, gpu_id in enumerate(self.use_gpu):
                print('Initializing graph on gpu %i' % gpu_id)
                with tf.device('/gpu:%d' % gpu_id):
                    pooled_fake_x = fake_x_list[
                        i]  # already has the date appended
                    pooled_fake_y = fake_y_list[
                        i]  # already has the date appended
                    inp_x = inp_x_list[i]
                    inp_y = inp_y_list[i]
                    time_x = time_x_list[i]
                    time_y = time_y_list[i]

                    inp_shapes = tf.shape(inp_x)

                    tiled_time_x = tf.expand_dims(
                        tf.expand_dims(time_x, axis=1), axis=1) * tf.ones(
                            (inp_shapes[0], inp_shapes[1], inp_shapes[2], 25))
                    tiled_time_y = tf.expand_dims(
                        tf.expand_dims(time_y, axis=1), axis=1) * tf.ones(
                            (inp_shapes[0], inp_shapes[1], inp_shapes[2], 25))
                    # Real Examples
                    inp_x_time_x = tf.concat([inp_x, tiled_time_x], axis=-1)
                    inp_y_time_y = tf.concat([inp_y, tiled_time_y], axis=-1)

                    # X -> Y
                    fake_y = self.G(inp_x, tiled_time_y)
                    fake_y_time_y = tf.concat([fake_y, tiled_time_y], axis=-1)
                    fake_ys.append(fake_y_time_y)

                    G_gan_loss = self.generator_loss(self.D_Y,
                                                     fake_y_time_y,
                                                     use_lsgan=self.use_lsgan)
                    D_Y_loss = self.discriminator_loss(
                        self.D_Y,
                        inp_y_time_y,
                        pooled_fake_y,
                        use_lsgan=self.use_lsgan)

                    # Y -> X
                    fake_x = self.F(inp_y, tiled_time_x)
                    fake_x_time_x = tf.concat([fake_x, tiled_time_x], axis=-1)
                    fake_xs.append(fake_x_time_x)

                    F_gan_loss = self.generator_loss(self.D_X,
                                                     fake_x_time_x,
                                                     use_lsgan=self.use_lsgan)
                    D_X_loss = self.discriminator_loss(
                        self.D_X,
                        inp_x_time_x,
                        pooled_fake_x,
                        use_lsgan=self.use_lsgan)

                    cycle_loss = self.cycle_consistency_loss(
                        self.G, self.F, inp_x, inp_y, fake_x, fake_y,
                        tiled_time_x, tiled_time_y)

                    F_loss = F_gan_loss + cycle_loss
                    G_loss = G_gan_loss + cycle_loss

                    tot_F_loss += F_loss
                    tot_D_X_loss += D_X_loss
                    tot_G_loss += G_loss
                    tot_D_Y_loss += D_Y_loss

                    tf.get_variable_scope().reuse_variables()

                    G_opt_grads.append(
                        self.opt.compute_gradients(G_loss,
                                                   var_list=self.G.variables))
                    F_opt_grads.append(
                        self.opt.compute_gradients(F_loss,
                                                   var_list=self.F.variables))
                    D_Y_opt_grads.append(
                        self.opt.compute_gradients(
                            D_Y_loss, var_list=self.D_Y.variables))
                    D_X_opt_grads.append(
                        self.opt.compute_gradients(
                            D_X_loss, var_list=self.D_X.variables))
        G_grads = ops.average_gradients(G_opt_grads)
        F_grads = ops.average_gradients(F_opt_grads)
        D_Y_grads = ops.average_gradients(D_Y_opt_grads)
        D_X_grads = ops.average_gradients(D_X_opt_grads)

        G_ts = self.opt.apply_gradients(G_grads, global_step=self.global_step)
        F_ts = self.opt.apply_gradients(F_grads, global_step=self.global_step)
        D_Y_ts = self.opt.apply_gradients(D_Y_grads,
                                          global_step=self.global_step)
        D_X_ts = self.opt.apply_gradients(D_X_grads,
                                          global_step=self.global_step)

        with tf.control_dependencies([G_ts, D_Y_ts, F_ts, D_X_ts]):
            ts = tf.no_op(name='optimizers')

        with tf.control_dependencies([G_ts, F_ts]):
            gen_ts = tf.no_op(name='gen_optimizers')

        fake_x = tf.concat(fake_xs, axis=0)
        fake_y = tf.concat(fake_ys, axis=0)

        num_gpu = max(1., len(self.use_gpu) + 0.)

        return (tot_G_loss / num_gpu, tot_D_Y_loss / num_gpu,
                tot_F_loss / num_gpu, tot_D_X_loss / num_gpu, fake_y, fake_x,
                ts, gen_ts)
Пример #4
0
    def build(self):
        """ Builds a multi-tower model
        """
        with tf.device('/cpu:0'):
            assert self.batch_size % self.num_gpus == 0, (
                'Batch size must be divisible by number of GPUs')

            with tf.name_scope('Input_splits'):
                tower_inputs = [[] for i in range(self.num_gpus)]
                for inp in self.Inputs:
                    splits = tf.split(inp, self.num_gpus, name=inp.name[:-2])
                    for i, s in enumerate(splits):
                        tower_inputs[i].append(s)

            tower_outputs = []
            tower_losses = []
            tower_grads = []
            with tf.variable_scope(tf.get_variable_scope()):
                for i in range(self.num_gpus):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                            # Calculate the loss for one tower of the model. This function
                            # constructs the entire model but shares the variables across
                            # all towers.
                            outputs, losses, grads = self._build_train_tower(
                                tower_inputs[i],
                                int(self.batch_size / self.num_gpus),
                                reuse=i > 0 or self.model_built)

                            # Reuse variables for the next tower.
                            tf.get_variable_scope().reuse_variables()

                            # Save summaries from tower_1
                            if i == 0:
                                summaries = tf.get_collection(
                                    tf.GraphKeys.SUMMARIES, scope)

                            tower_outputs.append(outputs)
                            tower_losses.append(losses)
                            tower_grads.append(grads)

            with tf.name_scope('Concat_outputs'):
                outputs = [[] for _ in tower_outputs[0]]
                for t_outputs in tower_outputs:
                    for i, output in enumerate(t_outputs):
                        outputs[i].append(output)
                self.outputs = []
                for outs in outputs:
                    self.outputs.append(tf.concat(outs, 0))

            with tf.name_scope('Concat_losses'):
                losses = [[] for _ in range(len(tower_losses[0]))]
                for t_losses in tower_losses:
                    for i, loss in enumerate(t_losses):
                        losses[i].append(loss)

            with tf.name_scope('Average_grads'):
                var_grads = [[] for _ in range(len(tower_grads[0]))]
                for t_grads in tower_grads:
                    for i, grad in enumerate(t_grads):
                        var_grads[i].append(grad)
                avg_grads = []
                for v_grads in var_grads:
                    avg_grads.append(ops.average_gradients(v_grads))

            if self.grad_summ:
                # Add histograms for gradients.
                with tf.name_scope('Grad_summary'):
                    grads_summ = []
                    for var_grads in avg_grads:
                        for grad, var in var_grads:
                            if grad is not None:
                                grads_summ.append(
                                    tf.summary.histogram(
                                        self._remove_tower_name_prefix(var) +
                                        '/Grads', grad))
                    summaries.append(tf.summary.merge(grads_summ))

            if self.var_summ:
                # Add histograms for trainable variables.
                t_vars = tf.trainable_variables()
                with tf.name_scope('Var_summary'):
                    vars_summ = []
                    for var in t_vars:
                        vars_summ.append(
                            tf.summary.histogram(
                                self._remove_tower_name_prefix(var), var))
                    summaries.append(tf.summary.merge(vars_summ))

            summaries += self.additional_summaries()

            self._tower_outputs(self.outputs)
            self._build_train_ops(losses, avg_grads)
            self.summary_op = tf.summary.merge(summaries, name='summary_op')
            self.saver = tf.train.Saver()
            self.model_built = True
            utils.count_params()
Пример #5
0
    def model(self, preemptive_reuse=False):
        """
        Initialize the model to train
        Support multiple GPUs
        """
        with tf.variable_scope(tf.get_variable_scope()):
            # split data into n equal batches and distribute them onto multiple GPUs
            im_a_list = tf.split(self.im_a, len(self.use_gpu))
            im_b_list = tf.split(self.im_b, len(self.use_gpu))
            labels_list = tf.split(self.label, len(self.use_gpu))
            # initialize the optimizer
            self._opt = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate)

            # Used to average
            all_grads = []
            all_out = []
            all_loss = []
            batchnorm_updates = []
            for i, gpu_id in enumerate(self.use_gpu):
                print('Initializing graph on gpu(cpu) %i' % gpu_id)
                with tf.device('/gpu:%d' % gpu_id):
                    with tf.name_scope('tower_%d' % gpu_id):
                        if preemptive_reuse:
                            tf.get_variable_scope().reuse_variables()

                        im_a, im_b = im_a_list[i], im_b_list[i]
                        labels = labels_list[i]
                        with tf.name_scope('extract_feature_a') as scope:
                            im_a_feat = self.extract_features_resnet50(
                                im_a, scope_name='feature_cnn')
                            self.im_a_feat = im_a_feat
                            # we should retain update ops of batch norm on one branch of the 1st tower
                            # because the var,mean,offset,scale are shared across branches, towers
                            if i == 0:
                                batchnorm_updates = tf.get_collection(
                                    tf.GraphKeys.UPDATE_OPS, scope=scope)

                        with tf.name_scope('extract_feature_b'):
                            im_b_feat = self.extract_features_resnet50(
                                im_b, scope_name='feature_cnn', reuse=True)
                            self.im_b_feat = im_b_feat

                        with tf.name_scope('predict_same'):
                            feat_ab = tf.concat(
                                [self.im_a_feat, self.im_b_feat], axis=-1)
                            out_ab = self.predict(feat_ab, name='predict')
                            all_out.append(out_ab)

                        with tf.name_scope('classification_loss'):
                            loss = tf.nn.sigmoid_cross_entropy_with_logits(
                                labels=labels, logits=out_ab)
                            all_loss.append(loss)

                    # once calling this, all variables are reused. A setting reuse=False is no more effective
                    tf.get_variable_scope().reuse_variables()
                    grad = self._opt.compute_gradients(
                        loss, var_list=self.get_variables())
                    all_grads.append(
                        grad)  # List of lists of (gradient, variable) tuples

        # Average the gradient and apply
        avg_grads = ops.average_gradients(all_grads)
        self.all_loss = all_loss
        self.avg_grads = avg_grads
        self.loss = tf.reduce_mean(all_loss)

        # Trains all variables for now
        apply_grad_op = self._opt.apply_gradients(self.avg_grads,
                                                  global_step=self.global_iter)
        if len(batchnorm_updates) != 0:
            batchnorm_updates_op = tf.group(*batchnorm_updates)
            self.opt = tf.group(apply_grad_op, batchnorm_updates_op)
        else:
            self.opt = apply_grad_op
        # For logging results
        self.logits = tf.concat(all_out, axis=0)
        self.pred = tf.nn.sigmoid(self.logits)
        self.cls = tf.round(self.pred)
Пример #6
0
    def _prepare_training(self):
        """Prepare Training

        Make tensorflow's graph.
        To support Multi-GPU, divide mini-batch.
        And this program has resume function.
        If there is checkpoint file in FLAGS.classifier_dir/log, load checkpoint file and restart training.
        """
        self.gpu_n = len(FLAGS.gpu_ids.split(','))
        with tf.device('/cpu:0'):
            self.imgs = tf.placeholder(tf.float32, (FLAGS.batch_size, FLAGS.img_width, FLAGS.img_height, FLAGS.img_dim), name='imgs')
            self.labels = tf.placeholder(tf.float32, (FLAGS.batch_size, 26), name='labels')
            self.is_train = tf.placeholder(tf.bool, name='is_train')
            c_opt = tf.train.AdamOptimizer(learning_rate=0.001)

        c_loss = [0] * self.gpu_n
        c_acc = [0] * self.gpu_n
        c_grads = [0] * self.gpu_n
        label_corrects = [0] * self.gpu_n
        label_ns = [0] * self.gpu_n
        is_not_first = False

        for i in range(self.gpu_n):
            with tf.device('/gpu:{}'.format(i)):
                classifier = Classifier(img_size=(FLAGS.img_width, FLAGS.img_height),
                                        img_dim=FLAGS.img_dim,
                                        k_size=3,
                                        class_n=26,
                                        smallest_unit_n=64)
                classified = classifier(self.imgs, is_train=self.is_train, is_reuse=is_not_first)
                c_loss[i] = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=classified))
                label_indecies = tf.argmax(self.labels, 1)
                correct_pred = tf.equal(tf.argmax(classified, 1), label_indecies)
                c_acc[i] = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

                if FLAGS.labelacc:
                    masked_label_indecies = tf.boolean_mask(label_indecies, correct_pred)
                    label_corrects[i] = tf.reduce_sum(tf.one_hot(masked_label_indecies, 26), axis=0)
                    label_ns[i] = tf.reduce_sum(tf.one_hot(label_indecies, 26), axis=0)

                c_vars = [var for var in tf.trainable_variables() if 'classifier' in var.name]
                c_grads[i] = c_opt.compute_gradients(c_loss[i], var_list=c_vars)
            is_not_first = True

        with tf.device('/cpu:0'):
            self.c_loss = sum(c_loss) / len(c_loss)
            self.c_acc = sum(c_acc) / len(c_acc)
            if FLAGS.labelacc:
                self.c_acc_by_labels = sum(label_corrects) / sum(label_ns)
            avg_c_grads = average_gradients(c_grads)
            self.c_train = c_opt.apply_gradients(avg_c_grads)

        sess_config = tf.ConfigProto(
            gpu_options=tf.GPUOptions(visible_device_list=FLAGS.gpu_ids),
            allow_soft_placement=FLAGS.labelacc,
            log_device_placement=FLAGS.labelacc
        )
        self.sess = tf.Session(config=sess_config)
        self.saver = tf.train.Saver()
        checkpoint = tf.train.get_checkpoint_state(self.dst_log)
        if checkpoint:
            saver_resume = tf.train.Saver()
            saver_resume.restore(self.sess, checkpoint.model_checkpoint_path)
            self.epoch_start = int(checkpoint.model_checkpoint_path.split('-')[-1])
            print('restore ckpt')
        else:
            self.sess.run(tf.global_variables_initializer())
            self.epoch_start = 0