def _create_image_encoder(preprocess_fn, factory_fn, image_shape, batch_size=32,
                         session=None, checkpoint_path=None,
                         loss_mode="cosine"):
    image_var = tf.placeholder(tf.uint8, (None, ) + image_shape)

    preprocessed_image_var = tf.map_fn(
        lambda x: preprocess_fn(x, is_training=False),
        tf.cast(image_var, tf.float32))

    l2_normalize = loss_mode == "cosine"
    feature_var, _ = factory_fn(
        preprocessed_image_var, l2_normalize=l2_normalize, reuse=None)
    feature_dim = feature_var.get_shape().as_list()[-1]

    if session is None:
        session = tf.Session()
    if checkpoint_path is not None:
        slim.get_or_create_global_step()
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
            checkpoint_path, slim.get_variables_to_restore())
        session.run(init_assign_op, feed_dict=init_feed_dict)

    def encoder(data_x):
        out = np.zeros((len(data_x), feature_dim), np.float32)
        _run_in_batches(
            lambda x: session.run(feature_var, feed_dict=x),
            {image_var: data_x}, out, batch_size)
        return out

    return encoder
Пример #2
0
 def load_ckpt(self, sess, ckpt='ckpts/vgg_16.ckpt'):
     variables = slim.get_variables(scope='vgg_16',
                                    suffix="weights") + slim.get_variables(
                                        scope='vgg_16', suffix="biases")
     init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
         ckpt, variables)
     sess.run(init_assign_op, init_feed_dict)
Пример #3
0
def load_ckpt_path(sess, model_path, variables_to_restore=None):
    if variables_to_restore is None:
        variables_to_restore = slim.get_variables_to_restore()
    restore_op, restore_fd = slim.assign_from_checkpoint(
        model_path, variables_to_restore)
    sess.run(restore_op, feed_dict=restore_fd)
    print(f'{model_path} loaded')
Пример #4
0
def _create_image_encoder(preprocess_fn,
                          factory_fn,
                          image_shape,
                          batch_size=32,
                          session=None,
                          checkpoint_path=None,
                          loss_mode="cosine"):
    image_var = tf.placeholder(tf.uint8, (None, ) + image_shape)

    preprocessed_image_var = tf.map_fn(
        lambda x: preprocess_fn(x, is_training=False),
        tf.cast(image_var, tf.float32))

    l2_normalize = loss_mode == "cosine"
    feature_var, _ = factory_fn(preprocessed_image_var,
                                l2_normalize=l2_normalize,
                                reuse=None)
    feature_dim = feature_var.get_shape().as_list()[-1]

    if session is None:
        session = tf.Session()
    if checkpoint_path is not None:
        #slim.get_or_create_global_step() ----------------------------------------------------------
        tf.train.get_or_create_global_step()
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
            checkpoint_path, slim.get_variables_to_restore())
        session.run(init_assign_op, feed_dict=init_feed_dict)

    def encoder(data_x):
        out = np.zeros((len(data_x), feature_dim), np.float32)
        _run_in_batches(lambda x: session.run(feature_var, feed_dict=x),
                        {image_var: data_x}, out, batch_size)
        return out

    return encoder
Пример #5
0
 def assign_from_checkpoint(variables, checkpoint):
     logging.info('Request to re-store {} weights from {}'.format(
         len(variables), checkpoint))
     if not variables:
         logging.error('can\'t find any variables to restore.')
         sys.exit(1)
     assign_op, feed_dict = slim.assign_from_checkpoint(checkpoint, variables)
     all_assign_ops.append(assign_op)
     all_feed_dict.update(feed_dict)
Пример #6
0
 def assign_from_checkpoint(variables, checkpoint):
   logging.info('Request to re-store %d weights from %s',
                len(variables), checkpoint)
   if not variables:
     logging.error('Can\'t find any variables to restore.')
     sys.exit(1)
   assign_op, feed_dict = slim.assign_from_checkpoint(checkpoint, variables)
   all_assign_ops.append(assign_op)
   all_feed_dict.update(feed_dict)
Пример #7
0
def load_ckpt(sess, model_dir, variables_to_restore=None):
    ckpt = tf.train.get_checkpoint_state(model_dir)
    model_path = ckpt.model_checkpoint_path
    if variables_to_restore is None:
        variables_to_restore = slim.get_variables_to_restore()
    restore_op, restore_fd = slim.assign_from_checkpoint(
        model_path, variables_to_restore)
    sess.run(restore_op, feed_dict=restore_fd)
    print(f'{model_path} loaded')
Пример #8
0
    def restore_discriminator(self, params):
        d_vars = [var.name for var in self.dsc_vars]
        variables = slim.get_variables_to_restore(include=d_vars)
        print("variables_dsc: ", variables)

        path = params.encoder_checkpoint_name if not self.isIdeRun else "../checkpoints/exp70/checkpoint/DCGAN.model-50"
        print('restoring discriminator to [%s]...' % path)
        init_restore_op, init_feed_dict  = slim.assign_from_checkpoint(model_path=path, var_list=variables)
        self.sess.run(init_restore_op, feed_dict=init_feed_dict)
        print('discriminator restored.')
Пример #9
0
    def restore_encoder(self, params):
        enc_vars = [var.name for var in self.gen_vars if 'g_1' in var.name]
        variables = slim.get_variables_to_restore(include=enc_vars)
        # print("variables1: ", variables)

        path = params.encoder_checkpoint_name if not self.isIdeRun else "../checkpoints/exp70/checkpoint/DCGAN.model-50"
        print('restoring encoder to [%s]...' % path)
        init_restore_op, init_feed_dict  = slim.assign_from_checkpoint(model_path=path, var_list=variables)
        self.sess.run(init_restore_op, feed_dict=init_feed_dict)
        print('encoder restored.')
Пример #10
0
    def restore_alexnet(self, chkp_name):
        al_var_names = [var.name for var in self.an_vars]
        variables = slim.get_variables_to_restore(include=al_var_names)
        # print("variables1: ", variables)

        path = chkp_name if not self.isIdeRun else "../checkpoints/exp70/checkpoint/DCGAN.model-50"
        print('restoring alexnet from [%s]...' % path)
        init_restore_op, init_feed_dict  = slim.assign_from_checkpoint(model_path=path, var_list=variables)
        self.sess.run(init_restore_op, feed_dict=init_feed_dict)
        print('alexnet restored.')
Пример #11
0
def get_init_fn():
    checkpoint_exclude_scopes=["InceptionV1/Logits", "InceptionV1/AuxLogits", "InceptionV2"]
    
    exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]
    print(exclusions)
    variables_to_restore = []
    for var in slim.get_model_variables():
        excluded = False
        for exclusion in exclusions:
            if var.op.name.startswith(exclusion):
                excluded = True
                break
        if not excluded:
            variables_to_restore.append(var)

    checkpoint_exclude_scopes=["InceptionV2/Logits", "InceptionV2/AuxLogits", "InceptionV1"]
    
    exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]
    print(exclusions)
    variables_to_restore_2 = []
    for var in slim.get_model_variables():
        excluded = False
        for exclusion in exclusions:
            if var.op.name.startswith(exclusion):
                excluded = True
                break
        if not excluded:
            variables_to_restore_2.append(var)


    report_init_assign_op, report_init_feed_dict = slim.assign_from_checkpoint(
                os.path.join(report_inception_network_dir, 'inception_v1.ckpt'), variables_to_restore_2, ignore_missing_vars=True)

    satelite_init_assign_op, satelite_init_feed_dict = slim.assign_from_checkpoint(
                os.path.join(satelite_inception_network_dir, 'inception_v1.ckpt'), variables_to_restore, ignore_missing_vars=True)

    def init_fn(sess):
        sess.run(report_init_assign_op, report_init_feed_dict)
        sess.run(satelite_init_assign_op, satelite_init_feed_dict)

    return init_fn
Пример #12
0
def main(_):
    tf.reset_default_graph()

    env = environment.get_game_environment(FLAGS.maps,
                                           multiproc=FLAGS.multiproc,
                                           random_goal=FLAGS.random_goal,
                                           random_spawn=FLAGS.random_spawn,
                                           apple_prob=FLAGS.apple_prob)
    exp = expert.Expert()
    net = CMAP(num_iterations=FLAGS.vin_iterations,
               estimate_scale=FLAGS.estimate_scale,
               unified_fuser=FLAGS.unified_fuser,
               unified_vin=FLAGS.unified_vin,
               biased_fuser=FLAGS.biased_fuser,
               biased_vin=FLAGS.biased_vin,
               regularization=FLAGS.reg)

    estimate_images = [estimate[0, -1, :, :, 0] for estimate in net.intermediate_tensors['estimate_map_list']]
    goal_images = [goal[0, -1, :, :, 0] for goal in net.intermediate_tensors['goal_map_list']]
    reward_images = [reward[0, -1, :, :, 0] for reward in net.intermediate_tensors['reward_map_list']]
    value_images = [value[0, -1, :, :, 0] for value in net.intermediate_tensors['value_map_list']]
    action_images = [action[0, -1, :, :, 0] for action in net.intermediate_tensors['action_map_list']]

    step_history = tf.placeholder(tf.string, name='step_history')
    step_history_op = tf.summary.text('game/step_history', step_history, collections=['game'])

    global_step = slim.get_or_create_global_step()
    update_global_step_op = tf.assign_add(global_step, 1)

    init_op = tf.variables_initializer([global_step])
    load_op, load_feed_dict = slim.assign_from_checkpoint(FLAGS.modeldir,
                                                          slim.get_variables_to_restore(exclude=[global_step.name]))

    init_op = tf.group(init_op, load_op)

    slim.learning.train(train_op=tf.no_op('train'),
                        logdir=FLAGS.logdir,
                        init_op=init_op,
                        init_feed_dict=load_feed_dict,
                        global_step=global_step,
                        train_step_fn=DAGGER_train_step,
                        train_step_kwargs=dict(env=env, exp=exp, net=net,
                                               update_global_step_op=update_global_step_op,
                                               step_history=step_history,
                                               step_history_op=step_history_op,
                                               estimate_maps=estimate_images,
                                               goal_maps=goal_images,
                                               reward_maps=reward_images,
                                               value_maps=value_images,
                                               action_maps=action_images),
                        number_of_steps=FLAGS.num_games,
                        save_interval_secs=300 if not FLAGS.debug else 60,
                        save_summaries_secs=300 if not FLAGS.debug else 60)
 def initialize(self, checkpoint_path=None):
     """Overwrite default to make lazy"""
     init_op = tf.variables_initializer(self.get_init_variables())
     self.session.run(init_op)
     if checkpoint_path is not None:
         self.set_global_step(
             RestoreTFModelHook.parse_global_step(checkpoint_path))
         init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
             checkpoint_path,
             self.get_restore_variables(),
             ignore_missing_vars=True)
         self.session.run(init_assign_op, feed_dict=init_feed_dict)
         self.logger.info("Lazily restored from {}".format(checkpoint_path))
Пример #14
0
 def load(self, sess):
     model_saver = self.get_saver()
     ckpt = tf.train.latest_checkpoint(str(self.exp_dir),
                                       latest_filename='%s_ckpt' %
                                       self.scope)
     if ckpt is None:
         print('[ %s ] No ckpt found...' % self.scope)
         return
     print('Loading %s' % str(ckpt))
     init_op, init_feed = slim.assign_from_checkpoint(
         model_path=ckpt, var_list=self.vars(), ignore_missing_vars=True)
     sess.run(init_op, init_feed)
     # model_saver.restore(sess, ckpt)
     return
Пример #15
0
        def init_points(sess):

            if Flags.net == 'D':
                restore = slim.get_model_variables('train/densenet161')

                restore = {change_checkpoint_name(var): var for var in restore}

                init_points_dense_op, init_points_dense_feed_dict = slim.assign_from_checkpoint(
                    os.path.join(Flags.checkpoint_dir, 'tf-densenet161.ckpt'),
                    restore)

                sess.run(init_points_dense_op, init_points_dense_feed_dict)
            else:
                restore = slim.get_model_variables('train/vgg_16')

                restore = {
                    change_checkpoint_name3(var): var
                    for var in restore
                }

                init_points_vgg_op, init_points_vgg_feed_dict = slim.assign_from_checkpoint(
                    os.path.join(Flags.checkpoint_dir, 'vgg_16.ckpt'), restore)

                sess.run(init_points_vgg_op, init_points_vgg_feed_dict)
Пример #16
0
    def testTrainWithInitFromCheckpoint(self):
        logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs1/')
        logdir2 = os.path.join(self.get_temp_dir(), 'tmp_logs2/')
        if tf.gfile.Exists(logdir1):  # For running on jenkins.
            tf.gfile.DeleteRecursively(logdir1)
        if tf.gfile.Exists(logdir2):  # For running on jenkins.
            tf.gfile.DeleteRecursively(logdir2)

        # First, train the model one step (make sure the error is high).
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            train_op = self.create_train_op()
            loss = slim.learning.train(train_op, logdir1, number_of_steps=1)
            self.assertGreater(loss, .5)

        # Next, train the model to convergence.
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(1)
            train_op = self.create_train_op()
            loss = slim.learning.train(train_op, logdir1, number_of_steps=300)
            self.assertLess(loss, .02)

        # Finally, advance the model a single step and validate that the loss is
        # still low.
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(2)
            train_op = self.create_train_op()

            model_variables = tf.all_variables()
            model_path = os.path.join(logdir1, 'model.ckpt-300')

            init_op = tf.initialize_all_variables()
            op, init_feed_dict = slim.assign_from_checkpoint(
                model_path, model_variables)

            def InitAssignFn(sess):
                sess.run(op, init_feed_dict)

            loss = slim.learning.train(train_op,
                                       logdir2,
                                       number_of_steps=1,
                                       init_op=init_op,
                                       init_fn=InitAssignFn)

            self.assertLess(loss, .02)
Пример #17
0
    def make_init_fn(self, chpt_path):
        # Handle model initialization from prior checkpoint
        if chpt_path is None:
            return None

        var2restore = slim.get_variables_to_restore(exclude=self.exclude_scopes)
        print('Variables to restore: {}'.format([v.op.name for v in var2restore]))
        var2restore = remove_missing(var2restore, chpt_path)
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(chpt_path, var2restore)
        sys.stdout.flush()

        # Create an initial assignment function.
        def init_fn(sess):
            print('Restoring from: {}'.format(chpt_path))
            sess.run(init_assign_op, init_feed_dict)

        return init_fn
Пример #18
0
def main(unused_argv):
  if not FLAGS.input_file_pattern:
    raise ValueError("--input_file_pattern is required.")
  if not FLAGS.train_dir:
    raise ValueError("--train_dir is required.")

  with open(FLAGS.model_config) as json_config_file:
    model_config = json.load(json_config_file)

  model_config = configuration.model_config(model_config, mode="train")
  tf.logging.info("Building training graph.")
  g = tf.Graph()
  with g.as_default():
    model = s2v_model.s2v(model_config, mode="train")
    model.build()

    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)

    train_tensor = tf.contrib.slim.learning.create_train_op(
        total_loss=model.total_loss,
        optimizer=optimizer,
        clip_gradient_norm=FLAGS.clip_gradient_norm)

    saver = tf.train.Saver(max_to_keep=FLAGS.max_ckpts)

    checkpoint_path = model_config.checkpoint_path
    variables_to_restore = slim.get_model_variables()
    checkpoint_path = tf.train.latest_checkpoint(model_config.checkpoint_path)
    init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
        checkpoint_path, variables_to_restore)

    def InitAssignFn(sess):
      sess.run(init_assign_op, init_feed_dict)

  nsteps = int(FLAGS.nepochs * (FLAGS.num_train_inst / FLAGS.batch_size))
  slim.learning.train(
      train_op=train_tensor,
      logdir=FLAGS.train_dir,
      graph=g,
      number_of_steps=nsteps,
      save_summaries_secs=FLAGS.save_summaries_secs,
      saver=saver,
      save_interval_secs=FLAGS.save_model_secs,
      init_fn=InitAssignFn
  )
Пример #19
0
def _create_encoder(preprocess_fn,
                    network_factory,
                    image_shape,
                    batch_size=32,
                    session=None,
                    checkpoint_path=None,
                    read_from_file=False):
    if read_from_file:
        num_channels = image_shape[-1] if len(image_shape) == 3 else 1
        input_var = tf.placeholder(tf.string, (None, ))
        image_var = tf.map_fn(lambda x: tf.image.decode_jpeg(
            tf.read_file(x), channels=num_channels),
                              input_var,
                              back_prop=False,
                              dtype=tf.uint8)
        image_var = tf.image.resize_images(image_var, image_shape[:2])
    else:
        input_var = tf.placeholder(tf.uint8, (None, ) + image_shape)
        image_var = input_var

    preprocessed_image_var = tf.map_fn(
        lambda x: preprocess_fn(x, is_training=False),
        image_var,
        back_prop=False,
        dtype=tf.float32)

    feature_var, _ = network_factory(preprocessed_image_var)
    feature_dim = feature_var.get_shape().as_list()[-1]

    if session is None:
        session = tf.Session()
    if checkpoint_path is not None:
        tf.train.get_or_create_global_step()
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
            checkpoint_path, slim.get_model_variables())
        session.run(init_assign_op, feed_dict=init_feed_dict)

    def encoder(data_x):
        out = np.zeros((len(data_x), feature_dim), np.float32)
        queued_trainer.run_in_batches(
            lambda x: session.run(feature_var, feed_dict=x),
            {input_var: data_x}, out, batch_size)
        return out

    return encoder
Пример #20
0
    def testTrainWithInitFromCheckpoint(self):
        logdir1 = os.path.join(self.get_temp_dir(), "tmp_logs1/")
        logdir2 = os.path.join(self.get_temp_dir(), "tmp_logs2/")
        if tf.gfile.Exists(logdir1):  # For running on jenkins.
            tf.gfile.DeleteRecursively(logdir1)
        if tf.gfile.Exists(logdir2):  # For running on jenkins.
            tf.gfile.DeleteRecursively(logdir2)

        # First, train the model one step (make sure the error is high).
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            train_op = self.create_train_op()
            loss = slim.learning.train(train_op, logdir1, number_of_steps=1)
            self.assertGreater(loss, 0.5)

        # Next, train the model to convergence.
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(1)
            train_op = self.create_train_op()
            loss = slim.learning.train(train_op, logdir1, number_of_steps=300)
            self.assertLess(loss, 0.02)

        # Finally, advance the model a single step and validate that the loss is
        # still low.
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(2)
            train_op = self.create_train_op()

            model_variables = tf.all_variables()
            model_path = os.path.join(logdir1, "model.ckpt-300")

            init_op = tf.initialize_all_variables()
            op, init_feed_dict = slim.assign_from_checkpoint(model_path, model_variables)

            def InitAssignFn(sess):
                sess.run(op, init_feed_dict)

            loss = slim.learning.train(train_op, logdir2, number_of_steps=1, init_op=init_op, init_fn=InitAssignFn)

            self.assertLess(loss, 0.02)
Пример #21
0
def val(config):

    val_dataset = Dataset(os.path.join(config['input']['path']))

    with tf.Graph().as_default():

        with tf.name_scope('val') as scope:
            val_loss, val_accuracy, val_summary = build_val_graph(config, val_dataset)

        exclude = cnn_architectures.model_weight_excludes(config['model']['architecture'])
        variables_to_restore = slim.get_variables_to_restore()
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(config['model']['checkpoint'],
                                                                     variables_to_restore)

        # initialize
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            sess.run(init_assign_op, init_feed_dict)

            # Start the queue runners.
            coord = tf.train.Coordinator()
            tf.train.start_queue_runners(sess=sess, coord=coord)
            print('graph built')

            com_acc = 0.0
            com_loss = 0.0
            count = 0
            for x in range(val_dataset.num_images() // config['parameters']['batch_size'] + 1):
                acc_v, loss_v = sess.run([val_accuracy, val_loss])
                com_acc += acc_v
                com_loss += loss_v
                count += 1
            print('validation loss: {} validation_accuracy: {}'.format(com_loss / count, com_acc / count))

            logging.info('accuracy = {}, loss = {}'.format(acc_v, loss_v))
Пример #22
0
    def load_model(self, dirname):

        self.init()

        # Try to load the model from the given directory
        latest_checkpoint = tf.train.latest_checkpoint(dirname)

        # If no model available, append current model's scoped name
        if latest_checkpoint is None:
            dirname = os.path.join(dirname, self.scoped_name)
            latest_checkpoint = tf.train.latest_checkpoint(dirname)

        if latest_checkpoint is None:
            raise RuntimeError(
                'Model checkpoint not found at {}'.format(dirname))

        with self.graph.as_default():
            # Use the slim package to load the checkpoint - this gives a chance to ignore missing variables
            init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                latest_checkpoint, self.parameters, ignore_missing_vars=True)
            self.sess.run(init_assign_op, feed_dict=init_feed_dict)

        self.is_initialized = True
        self.reset_performance_stats()
Пример #23
0
def train(args):
    model = AppearanceNetwork(args)

    save_directory = './save/'
    log_file_path = './training.log'
    log_file = open(log_file_path, 'w')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        global_step = tf.Variable(0, name='global_step', trainable=False)

        image_patches_placeholder = tf.placeholder(
            tf.float32, shape=[args.batch_size, 7, 128, 64, 3])

        labels_placeholder = tf.placeholder(tf.float32,
                                            shape=[args.batch_size])

        lr = tf.Variable(args.base_learning_rate,
                         trainable=False,
                         name="learning_rate")

        features, logits = model.inference(image_patches_placeholder)

        loss = model.cross_entropy_loss(logits, labels_placeholder)

        train_op = build_graph(args, global_step, lr, loss)

        sess = tf.Session()

        saver = tf.train.Saver(max_to_keep=100)

        ckpt = tf.train.get_checkpoint_state('./save')
        if ckpt is None:
            init = tf.global_variables_initializer()
            sess.run(init)
            if args.pretrained_ckpt_path is not None:
                # slim.get_or_create_global_step()
                init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                    args.pretrained_ckpt_path,
                    slim.get_variables_to_restore(exclude=[
                        "lstm", "fc_layer", "loss", "learning_rate", "softmax",
                        "global_step"
                    ]))
                sess.run(init_assign_op, feed_dict=init_feed_dict)
        else:
            print 'Loading Model from ' + ckpt.model_checkpoint_path
            saver.restore(sess, ckpt.model_checkpoint_path)

        best_epoch = -1
        best_loss_epoch = 0.0
        for curr_epoch in range(args.num_epoches):
            training_loss_epoch = 0.0
            valid_loss_epoch = 0.0

            ############################################# Training process ######################################
            print 'Training epoch ' + str(curr_epoch +
                                          1) + '........................'
            training_data_loader = DataLoader(is_valid=False)

            if curr_epoch % 10 == 0:
                sess.run(
                    tf.assign(
                        lr,
                        args.base_learning_rate *
                        (args.decay_rate**curr_epoch / 10)))

            training_data_loader.shuffle()
            training_data_loader.reset_pointer()

            for step in range(training_data_loader.num_batches):
                start_time = time.time()

                image_patches, labels = training_data_loader.next_batch()

                _, loss_batch = sess.run(
                    [train_op, loss],
                    feed_dict={
                        image_patches_placeholder: image_patches,
                        labels_placeholder: labels
                    })

                end_time = time.time()
                training_loss_epoch += loss_batch
                print(
                    "Training {}/{} (epoch {}), train_loss = {:.8f}, time/batch = {:.3f}"
                    .format(step + 1, training_data_loader.num_batches,
                            curr_epoch + 1, loss_batch, end_time - start_time))

            print 'Epoch ' + str(curr_epoch +
                                 1) + ' training is done! Saving model...'
            checkpoint_path = os.path.join(save_directory, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=global_step)

            ############################################# Validating process ######################################
            print 'Validating epoch ' + str(curr_epoch +
                                            1) + '...........................'
            valid_data_loader = DataLoader(is_valid=True)

            valid_data_loader.shuffle()
            valid_data_loader.reset_pointer()
            for step in range(valid_data_loader.num_batches):
                start_time = time.time()

                image_patches, labels = valid_data_loader.next_batch()

                loss_batch = sess.run(loss,
                                      feed_dict={
                                          image_patches_placeholder:
                                          image_patches,
                                          labels_placeholder: labels
                                      })

                end_time = time.time()
                valid_loss_epoch += loss_batch
                print(
                    "Validating {}/{} (epoch {}), valid_loss = {:.8f}, time/batch = {:.3f}"
                    .format(step + 1, valid_data_loader.num_batches,
                            curr_epoch + 1, loss_batch, end_time - start_time))

            # Update best valid epoch
            if best_epoch == -1 or best_loss_epoch > valid_loss_epoch:
                best_epoch = curr_epoch + 1
                best_loss_epoch = valid_loss_epoch

            log_file.write('epoch ' + str(curr_epoch + 1) + '\n')
            log_file.write(
                str(curr_epoch + 1) + ',' + str(training_loss_epoch) + '\n')
            log_file.write(
                str(curr_epoch + 1) + ',' + str(valid_loss_epoch) + '\n')
            log_file.write(str(best_epoch) + ',' + str(best_loss_epoch) + '\n')

        log_file.close()
Пример #24
0
def main():
    model = config.get('config', 'model')
    logdir = utils.get_logdir(config)
    if args.delete:
        tf.logging.warn('delete logging directory: ' + logdir)
        shutil.rmtree(logdir, ignore_errors=True)
    cachedir = utils.get_cachedir(config)
    with open(os.path.join(cachedir, 'names'), 'r') as f:
        names = [line.strip() for line in f]
    width = config.getint(model, 'width')
    height = config.getint(model, 'height')
    cell_width, cell_height = utils.calc_cell_width_height(config, width, height)
    tf.logging.warn('(width, height)=(%d, %d), (cell_width, cell_height)=(%d, %d)' % (width, height, cell_width, cell_height))
    yolo = importlib.import_module('model.' + model)
    paths = [os.path.join(cachedir, profile + '.tfrecord') for profile in args.profile]
    num_examples = sum(sum(1 for _ in tf.python_io.tf_record_iterator(path)) for path in paths)
    tf.logging.warn('num_examples=%d' % num_examples)
    with tf.name_scope('batch'):
        image_rgb, labels = utils.data.load_image_labels(paths, len(names), width, height, cell_width, cell_height, config)
        with tf.name_scope('per_image_standardization'):
            image_std = tf.image.per_image_standardization(image_rgb)
        batch = tf.train.shuffle_batch((image_std,) + labels, batch_size=args.batch_size,
            capacity=config.getint('queue', 'capacity'), min_after_dequeue=config.getint('queue', 'min_after_dequeue'),
            num_threads=multiprocessing.cpu_count()
        )
    global_step = tf.contrib.framework.get_or_create_global_step()
    builder = yolo.Builder(args, config)
    builder(batch[0], training=True)
    with tf.name_scope('total_loss') as name:
        builder.create_objectives(batch[1:])
        total_loss = tf.losses.get_total_loss(name=name)
    variables_to_restore = slim.get_variables_to_restore(exclude=args.exclude)
    with tf.name_scope('optimizer'):
        try:
            decay_steps = config.getint('exponential_decay', 'decay_steps')
            decay_rate = config.getfloat('exponential_decay', 'decay_rate')
            staircase = config.getboolean('exponential_decay', 'staircase')
            learning_rate = tf.train.exponential_decay(args.learning_rate, global_step, decay_steps, decay_rate, staircase=staircase)
            tf.logging.warn('using a learning rate start from %f with exponential decay (decay_steps=%d, decay_rate=%f, staircase=%d)' % (args.learning_rate, decay_steps, decay_rate, staircase))
        except (configparser.NoSectionError, configparser.NoOptionError):
            learning_rate = args.learning_rate
            tf.logging.warn('using a staionary learning rate %f' % args.learning_rate)
        optimizer = get_optimizer(config, args.optimizer)(learning_rate)
        tf.logging.warn('optimizer=' + args.optimizer)
        train_op = slim.learning.create_train_op(total_loss, optimizer, global_step,
            clip_gradient_norm=args.gradient_clip, summarize_gradients=config.getboolean('summary', 'gradients'),
        )
    if args.transfer:
        path = os.path.expanduser(os.path.expandvars(args.transfer))
        tf.logging.warn('transferring from ' + path)
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(path, variables_to_restore)
        def init_fn(sess):
            sess.run(init_assign_op, init_feed_dict)
            tf.logging.warn('transferring from global_step=%d, learning_rate=%f' % sess.run((global_step, learning_rate)))
    else:
        init_fn = lambda sess: tf.logging.warn('global_step=%d, learning_rate=%f' % sess.run((global_step, learning_rate)))
    summary(config)
    tf.logging.warn('tensorboard --logdir ' + logdir)
    slim.learning.train(train_op, logdir, master=args.master, is_chief=(args.task == 0),
        global_step=global_step, number_of_steps=args.steps, init_fn=init_fn,
        summary_writer=tf.summary.FileWriter(os.path.join(logdir, args.logname)),
        save_summaries_secs=args.summary_secs, save_interval_secs=args.save_secs
    )
Пример #25
0
def train():

    seed = 8964
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    pp = pprint.PrettyPrinter()
    pp.pprint(flags.FLAGS.__flags)

    if not os.path.exists(opt.checkpoint_dir):
        os.makedirs(opt.checkpoint_dir)

    with tf.Graph().as_default():
        # Data Loader
        loader = DataLoader(opt)
        tgt_image, src_image_stack, intrinsics = loader.load_train_batch()
        #print(tgt_image.shape, src_image_stack.shape)

        # Build Model
        model = GeoNetModel(opt, tgt_image, src_image_stack, intrinsics)
        loss = model.total_loss
        #mask = {}
        #mask["mask"] = model.tgt_image_tile_pyramid[0][0:,:,3:4]
        #mask["target"] = model.tgt_image
        # Train Op
        summary_mask = tf.summary.image('v1_mask',
                                        model.dispnet_inputs_mask[:1, :, :, :],
                                        1)
        #summary_mask_swell = tf.summary.image('v1_mask_swell', model.dispnet_inputs_mask_swell[:1,:,:,:], 1)
        summary_fwd_tgt_ignore = tf.summary.image(
            'v1_fwd_tgt_ignore', model.fwd_tgt_ignore[0][:1, :, :, :], 1)
        summary_bwd_src_ignore = tf.summary.image(
            'v1_bwd_src_ignore', model.bwd_src_ignore[0][:1, :, :, :], 1)
        summary_fwd_tgt_ignore_full = tf.summary.image(
            'v1_fwd_tgt_ignore_full',
            model.fwd_tgt_ignore_full[0][:1, :, :, :], 1)
        summary_bwd_src_ignore_full = tf.summary.image(
            'v1_bwd_src_ignore_full',
            model.bwd_src_ignore_full[0][:1, :, :, :], 1)

        summary_img = tf.summary.image('v1_warp_img',
                                       model.tgt_image[:1, :, :, :3], 3)
        summary_depth = tf.summary.image('v1_depth', model.pred_depth[0][:1],
                                         1)
        summary_rigid_fwd = tf.summary.image(
            'v1_rigid_fwd',
            tf.concat([
                model.fwd_rigid_flow_origin_pyramid[0][4:5],
                tf.zeros((1, 128, 416, 1))
            ], 3), 3)
        summary_rigid_bwd = tf.summary.image(
            'v1_rigid_bwd',
            tf.concat([
                model.bwd_rigid_flow_origin_pyramid[0][4:5],
                tf.zeros((1, 128, 416, 1))
            ], 3), 3)
        summary_flow_fwd = tf.summary.image(
            'v1_rflow_fwd',
            tf.concat([
                model.fwd_rigid_flow_pyramid[0][4:5],
                tf.zeros((1, 128, 416, 1))
            ], 3), 3)
        summary_flow_bwd = tf.summary.image(
            'v1_rflow_bwd',
            tf.concat([
                model.bwd_rigid_flow_pyramid[0][4:5],
                tf.zeros((1, 128, 416, 1))
            ], 3), 3)
        summary_res_fwd = tf.summary.image(
            'v1_res_fwd',
            tf.concat([
                model.fwd_res_flow_pyramid[0][4:5],
                tf.zeros((1, 128, 416, 1))
            ], 3), 3)
        summary_res_bwd = tf.summary.image(
            'v1_res_bwd',
            tf.concat([
                model.bwd_res_flow_pyramid[0][4:5],
                tf.zeros((1, 128, 416, 1))
            ], 3), 3)
        summary_warp_fwd = tf.summary.image(
            'v1_warp_fwd', model.fwd_rigid_warp_pyramid[0][4:5, :, :, :3], 3)
        summary_warp_bwd = tf.summary.image(
            'v1_warp_bwd', model.fwd_rigid_warp_pyramid[0][:1, :, :, :3], 3)

        summary_rigid_warp_loss = tf.summary.scalar('v1_rigid_warp_loss',
                                                    model.rigid_warp_loss)
        summary_disp_smooth_loss = tf.summary.scalar('v1_disp_smooth_loss',
                                                     model.disp_smooth_loss)
        summary_flow_warp_loss = tf.summary.scalar('v1_flow_warp_loss',
                                                   model.flow_warp_loss)
        summary_flow_smooth_loss = tf.summary.scalar('v1_flow_smooth_loss',
                                                     model.flow_smooth_loss)
        summary_rigid_smooth_loss = tf.summary.scalar('v1_rigid_smooth_loss',
                                                      model.rigid_smooth_loss)
        #summary_depth_constraint_loss = tf.summary.scalar('v1_depth_constraint_loss', model.depth_constraint_loss)
        summary_flow_consistency_loss = tf.summary.scalar(
            'v1_flow_consistency_loss', model.flow_consistency_loss)
        summary_rigid_consistency_loss = tf.summary.scalar(
            'v1_rigid_consistency_loss', model.rigid_consistency_loss)

        merged_summary = tf.summary.merge_all()

        if opt.mode == 'train_flow' and opt.flownet_type == "residual":
            # we pretrain DepthNet & PoseNet, then finetune ResFlowNetS
            train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           "flow_net")
            vars_to_restore = slim.get_variables_to_restore(
                include=["depth_net", "pose_net"])
        else:
            train_vars = [var for var in tf.trainable_variables()]
            vars_to_restore = slim.get_model_variables()

        if opt.init_ckpt_file != None:
            init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                opt.init_ckpt_file, vars_to_restore)

        optim = tf.train.AdamOptimizer(opt.learning_rate, 0.9)
        train_op = slim.learning.create_train_op(loss,
                                                 optim,
                                                 variables_to_train=train_vars)

        # Global Step
        global_step = tf.Variable(0, name='global_step', trainable=False)
        incr_global_step = tf.assign(global_step, global_step + 1)

        # Parameter Count
        parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                        for v in train_vars])

        # Saver
        saver = tf.train.Saver([var for var in tf.model_variables()] + \
                                [global_step],
                                max_to_keep=opt.max_to_keep)

        # Session
        sv = tf.train.Supervisor(logdir=opt.checkpoint_dir,
                                 save_summaries_secs=0,
                                 saver=None)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with sv.managed_session(config=config) as sess:
            writer = tf.summary.FileWriter('../logs_v1', sess.graph)
            print('Trainable variables: ')
            for var in train_vars:
                print(var.name)
                print(var.shape)
            print("parameter_count =", sess.run(parameter_count))

            if opt.init_ckpt_file != None:
                sess.run(init_assign_op, init_feed_dict)
            start_time = time.time()

            for step in range(1, opt.max_steps):
                fetches = {
                    "train": train_op,
                    "global_step": global_step,
                    "incr_global_step": incr_global_step,
                    #"input": model.tgt_image
                }
                if step % 10 == 0:
                    mysum = sess.run(merged_summary)
                    writer.add_summary(mysum, step)
                    writer.flush()
                if step % 100 == 0:
                    fetches["loss"] = loss
                results = sess.run(fetches)
                #aaaa =  np.array(sess.run(mask)["target"])
                #print(results["input"][0,:,:,3:4])
                #print aaaa[0,100:,:50,3]
                #print aaaa[0,100:,:50,3].max()
                #print aaaa[0,100:,:50,3].min()
                #print aaaa[0,100:,:50,3].shape
                if step % 100 == 0:
                    time_per_iter = (time.time() - start_time) / 100
                    start_time = time.time()
                    print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f' \
                          % (step, time_per_iter, results["loss"]))
                if step % opt.save_ckpt_freq == 0:
                    saver.save(sess,
                               os.path.join(opt.checkpoint_dir, 'model'),
                               global_step=step)
Пример #26
0
def train():

    seed = 8964
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    if not os.path.exists(opt.checkpoint_dir):
        os.makedirs(opt.checkpoint_dir)

    with tf.Graph().as_default():
        global_step = tf.Variable(0, name='global_step', trainable=False)
        incr_global_step = tf.assign(global_step, global_step + 1)
        optim = tf.train.AdamOptimizer(opt.learning_rate, 0.9)

        loader = DataLoader(opt)

        losses = []

        img_losses = []
        rigid_warp_losses = []
        disp_smooth_losses = []

        sem_losses = []
        sem_warp_losses = []
        sem_mask_losses = []
        sem_edge_losses = []

        sem_seg_losses = []
        ins0_seg_losses = []
        ins1_edge_seg_losses = []

        ins_losses = []

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(opt.num_gpus):
                with tf.device('/gpu:{:d}'.format(i)):
                    with tf.name_scope('gpu{:d}'.format(i)):
                        # Get images batch from data loader
                        tgt_image, src_image_stack, intrinsics, tgt_sem_tuple, src_sem_stack_tuple, tgt_ins_tuple, src_ins_stack_tuple = loader.load_train_batch(
                        )

                        # Build Model
                        model = SIGNetModel(opt, tgt_image, src_image_stack,
                                            intrinsics, tgt_sem_tuple,
                                            src_sem_stack_tuple, tgt_ins_tuple,
                                            src_ins_stack_tuple)

                        # Handle losses
                        losses.append(model.total_loss)
                        tf.get_variable_scope().reuse_variables()

                        img_losses.append(model.img_loss)
                        rigid_warp_losses.append(model.rigid_warp_loss)
                        disp_smooth_losses.append(model.disp_smooth_loss)
                        if opt.sem_as_loss:
                            sem_losses.append(model.sem_loss)
                            if opt.sem_warp_explore:
                                sem_warp_losses.append(model.sem_warp_loss)
                            if opt.sem_mask_explore:
                                sem_mask_losses.append(model.sem_mask_loss)
                            if opt.sem_edge_explore:
                                sem_edge_losses.append(model.sem_edge_loss)
                        if opt.ins_as_loss:
                            ins_losses.append(model.ins_loss)

                        if opt.sem_assist and opt.add_segnet:
                            sem_seg_losses.append(model.sem_seg_loss)
                            ins0_seg_losses.append(model.ins0_seg_loss)
                            ins1_edge_seg_losses.append(
                                model.ins1_edge_seg_loss)

                        #TODO tensorboard
                        tf.summary.image('tgt_image_g%02d' % (i),
                                         tgt_image,
                                         max_outputs=opt.max_outputs)
                        tf.summary.image('src_image_prev_g%02d' % (i),
                                         src_image_stack[:, :, :, :3],
                                         max_outputs=opt.max_outputs)
                        tf.summary.image('src_image_next_g%02d' % (i),
                                         src_image_stack[:, :, :, 3:],
                                         max_outputs=opt.max_outputs)
                        tf.summary.scalar('loss_g%02d' % (i), model.total_loss)
                        tf.summary.scalar('img_loss_g%02d' % (i),
                                          model.img_loss)
                        tf.summary.scalar('rigid_warp_loss_g%02d' % (i),
                                          model.rigid_warp_loss)
                        tf.summary.scalar('disp_smooth_loss_g%02d' % (i),
                                          model.disp_smooth_loss)

                        if opt.sem_as_loss:
                            tf.summary.scalar('sem_loss_g%02d' % (i),
                                              model.sem_loss)
                            if opt.sem_warp_explore:
                                tf.summary.scalar('sem_warp_loss_g%02d' % (i),
                                                  model.sem_warp_loss)
                        if opt.ins_as_loss:
                            tf.summary.scalar('ins_loss_g%02d' % (i),
                                              model.ins_loss)

                        if opt.sem_assist and opt.add_segnet:
                            tf.summary.scalar('sem_seg_loss_g%02d' % (i),
                                              model.sem_seg_loss)
                            tf.summary.scalar('ins0_seg_loss_g%02d' % (i),
                                              model.ins0_seg_loss)
                            tf.summary.scalar('ins1_edge_seg_loss_g%02d' % (i),
                                              model.ins1_edge_seg_loss)

                        #TODO Add bookkeeping ops
                        if i == 0:
                            # Train Op
                            if opt.mode == 'train_flow' and opt.flownet_type == "residual":
                                train_vars = tf.get_collection(
                                    tf.GraphKeys.TRAINABLE_VARIABLES,
                                    "flow_net")
                            else:
                                #TODO try to enable a solution to fix posenet weight in first stage
                                if opt.mode == 'train_rigid' and opt.fixed_posenet:
                                    if opt.new_sem_dispnet:
                                        train_vars = tf.get_collection(
                                            tf.GraphKeys.TRAINABLE_VARIABLES,
                                            "depth_sem_net")
                                    else:
                                        train_vars = tf.get_collection(
                                            tf.GraphKeys.TRAINABLE_VARIABLES,
                                            "depth_net")
                                else:
                                    train_vars = [
                                        var
                                        for var in tf.trainable_variables()
                                    ]

                            loading_net = ["depth_net", "pose_net"]

                            if opt.new_sem_dispnet:
                                loading_net.append("depth_sem_net")
                            if opt.new_sem_posenet:
                                loading_net.append("pose_sem_net")

                            vars_to_restore = slim.get_variables_to_restore(
                                include=loading_net)

                            if opt.init_ckpt_file != None:
                                init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                                    opt.init_ckpt_file, vars_to_restore)

        #TODO Cal mean losses among gpus, and track the loss in TF Summary.
        loss = tf.stack(axis=0, values=losses)
        loss = tf.reduce_mean(loss, 0)
        tf.summary.scalar('loss', loss)

        rigid_warp_loss = tf.stack(axis=0, values=rigid_warp_losses)
        rigid_warp_loss = tf.reduce_mean(rigid_warp_loss, 0)
        tf.summary.scalar('rigid_warp_loss', rigid_warp_loss)
        tf.summary.scalar(
            'unit_rigid_warp_loss',
            rigid_warp_loss / (opt.rigid_warp_weight +
                               tf.convert_to_tensor(1e-8, dtype=tf.float32)))

        disp_smooth_loss = tf.stack(axis=0, values=disp_smooth_losses)
        disp_smooth_loss = tf.reduce_mean(disp_smooth_loss, 0)
        tf.summary.scalar('disp_smooth_loss', disp_smooth_loss)
        tf.summary.scalar(
            'unit_disp_smooth_loss',
            disp_smooth_loss / (opt.disp_smooth_weight +
                                tf.convert_to_tensor(1e-8, dtype=tf.float32)))

        img_loss = tf.stack(axis=0, values=img_losses)
        img_loss = tf.reduce_mean(img_loss, 0)
        tf.summary.scalar('img_loss', img_loss)

        if opt.sem_as_loss:
            sem_loss = tf.stack(axis=0, values=sem_losses)
            sem_loss = tf.reduce_mean(sem_loss, 0)
            tf.summary.scalar('sem_loss', sem_loss)

            if opt.sem_warp_explore:
                sem_warp_loss = tf.stack(axis=0, values=sem_warp_losses)
                sem_warp_loss = tf.reduce_mean(sem_warp_loss, 0)
                tf.summary.scalar('sem_warp_loss', model.sem_warp_loss)
                tf.summary.scalar(
                    'unit_sem_warp_loss', model.sem_warp_loss /
                    (opt.sem_warp_weight +
                     tf.convert_to_tensor(1e-8, dtype=tf.float32)))
            if opt.sem_mask_explore:
                sem_mask_loss = tf.stack(axis=0, values=sem_mask_losses)
                sem_mask_loss = tf.reduce_mean(sem_mask_loss, 0)
                tf.summary.scalar('sem_mask_loss', model.sem_mask_loss)
                tf.summary.scalar(
                    'unit_sem_mask_loss', model.sem_mask_loss /
                    (opt.sem_mask_weight +
                     tf.convert_to_tensor(1e-8, dtype=tf.float32)))
            if opt.sem_edge_explore:
                sem_edge_loss = tf.stack(axis=0, values=sem_edge_losses)
                sem_edge_loss = tf.reduce_mean(sem_edge_loss, 0)
                tf.summary.scalar('sem_edge_loss', model.sem_edge_loss)
                tf.summary.scalar(
                    'unit_sem_edge_loss', model.sem_edge_loss /
                    (opt.sem_edge_weight +
                     tf.convert_to_tensor(1e-8, dtype=tf.float32)))

        if opt.sem_assist and opt.add_segnet:
            sem_seg_loss = tf.stack(axis=0, values=sem_seg_losses)
            sem_seg_loss = tf.reduce_mean(sem_seg_loss, 0)
            tf.summary.scalar('sem_seg_loss', sem_seg_loss)
            tf.summary.scalar(
                'unit_sem_seg_loss', model.sem_seg_loss /
                (opt.sem_seg_weight +
                 tf.convert_to_tensor(1e-8, dtype=tf.float32)))

            ins0_seg_loss = tf.stack(axis=0, values=ins0_seg_losses)
            ins0_seg_loss = tf.reduce_mean(ins0_seg_loss, 0)
            tf.summary.scalar('ins0_seg_loss', ins0_seg_loss)
            tf.summary.scalar(
                'unit_ins0_seg_loss', model.ins0_seg_loss /
                (opt.ins0_seg_weight +
                 tf.convert_to_tensor(1e-8, dtype=tf.float32)))

            ins1_edge_seg_loss = tf.stack(axis=0, values=ins1_edge_seg_losses)
            ins1_edge_seg_loss = tf.reduce_mean(ins1_edge_seg_loss, 0)
            tf.summary.scalar('ins1_edge_seg_loss', ins1_edge_seg_loss)
            tf.summary.scalar(
                'unit_ins1_edge_seg_loss', model.ins1_edge_seg_loss /
                (opt.ins1_edge_seg_weight +
                 tf.convert_to_tensor(1e-8, dtype=tf.float32)))

        if opt.ins_as_loss:
            ins_loss = tf.stack(axis=0, values=ins_losses)
            ins_loss = tf.reduce_mean(ins_loss, 0)
            tf.summary.scalar('ins_loss', ins_loss)

        train_op = slim.learning.create_train_op(
            loss,
            optim,
            variables_to_train=train_vars,
            colocate_gradients_with_ops=True)

        # Saver
        saver = tf.train.Saver([var for var in tf.model_variables()] + \
                                [global_step],
                                max_to_keep=opt.max_to_keep)

        merged_summary = tf.summary.merge_all()

        # Session
        sv = tf.train.Supervisor(logdir=opt.checkpoint_dir,
                                 save_summaries_secs=0,
                                 saver=None)

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True

        with sv.managed_session(config=config) as sess:
            train_writer = tf.summary.FileWriter(opt.summary_dir, sess.graph)

            if opt.init_ckpt_file != None:
                sess.run(init_assign_op, init_feed_dict)
            start_time = time.time()

            for step in range(1, opt.max_steps):
                fetches = {
                    "train": train_op,
                    "global_step": global_step,
                    "incr_global_step": incr_global_step
                }

                if step % opt.print_interval == 0:
                    fetches["loss"] = loss
                    fetches["img_loss"] = img_loss

                    if opt.sem_as_loss:
                        fetches["sem_loss"] = sem_loss
                    if opt.ins_as_loss:
                        fetches["ins_loss"] = ins_loss
                    if opt.add_segnet:
                        fetches["sem_seg_loss"] = sem_seg_loss
                        fetches["ins0_seg_loss"] = ins0_seg_loss
                        fetches["ins1_edge_seg_loss"] = ins1_edge_seg_loss

                results = sess.run(fetches)

                #TODO Write TF Summary to file.
                if step % opt.save_summ_freq == 0:
                    step_summary = sess.run(merged_summary)
                    train_writer.add_summary(step_summary, step)

                if step % opt.print_interval == 0:

                    time_per_iter = (time.time() -
                                     start_time) / opt.print_interval
                    start_time = time.time()

                    if opt.sem_as_loss:
                        print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f ImgLoss: %.3f SemLoss: %.3f' \
                        % (step, time_per_iter, results["loss"], results["img_loss"], results["sem_loss"]))
                    elif opt.ins_as_loss:
                        print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f ImgLoss: %.3f InsLoss: %.3f' \
                        % (step, time_per_iter, results["loss"], results["img_loss"], results["ins_loss"]))
                    else:
                        print('Iteration: [%7d] | Time: %4.4fs/iter | ImgLoss: %.3f' \
                        % (step, time_per_iter, results["loss"]))

                if step % opt.save_ckpt_freq == 0:
                    saver.save(sess,
                               os.path.join(opt.checkpoint_dir, 'model'),
                               global_step=step)
Пример #27
0
def train():

    seed = 8964
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    pp = pprint.PrettyPrinter()
    pp.pprint(flags.FLAGS.__flags)

    if not os.path.exists(opt.checkpoint_dir):
        os.makedirs(opt.checkpoint_dir)

    with tf.Graph().as_default():
        # Data Loader
        loader = DataLoader(opt)
        tgt_image, src_image_stack, intrinsics = loader.load_train_batch()

        # Build Model
        model = GeoNetModel(opt, tgt_image, src_image_stack, intrinsics)
        loss = model.total_loss

        # Train Op
        if opt.mode == 'train_flow' and opt.flownet_type == "residual":
            # we pretrain DepthNet & PoseNet, then finetune ResFlowNetS
            train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "flow_net")
            vars_to_restore = slim.get_variables_to_restore(include=["depth_net", "pose_net"])
        else:
            train_vars = [var for var in tf.trainable_variables()]
            vars_to_restore = slim.get_model_variables()

        if opt.init_ckpt_file != None:
            init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                                            opt.init_ckpt_file, vars_to_restore)

        optim = tf.train.AdamOptimizer(opt.learning_rate, 0.9)
        train_op = slim.learning.create_train_op(loss, optim,
                                                 variables_to_train=train_vars)

        # Global Step
        global_step = tf.Variable(0,
                                name='global_step',
                                trainable=False)
        incr_global_step = tf.assign(global_step,
                                     global_step+1)

        # Parameter Count
        parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                        for v in train_vars])

        # Saver
        saver = tf.train.Saver([var for var in tf.model_variables()] + \
                                [global_step],
                                max_to_keep=opt.max_to_keep)

        # Session
        sv = tf.train.Supervisor(logdir=opt.checkpoint_dir,
                                 save_summaries_secs=0,
                                 saver=None)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with sv.managed_session(config=config) as sess:
            print('Trainable variables: ')
            for var in train_vars:
                print(var.name)
            print("parameter_count =", sess.run(parameter_count))

            if opt.init_ckpt_file != None:
                sess.run(init_assign_op, init_feed_dict)
            start_time = time.time()

            for step in range(1, opt.max_steps):
                fetches = {
                    "train": train_op,
                    "global_step": global_step,
                    "incr_global_step": incr_global_step
                }
                if step % 100 == 0:
                    fetches["loss"] = loss
                results = sess.run(fetches)
                if step % 100 == 0:
                    time_per_iter = (time.time() - start_time) / 100
                    start_time = time.time()
                    print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f' \
                          % (step, time_per_iter, results["loss"]))
                if step % opt.save_ckpt_freq == 0:
                    saver.save(sess, os.path.join(opt.checkpoint_dir, 'model'), global_step=step)
Пример #28
0
    def run(self,
            feed_generator,
            train_op,
            log_dir="/tmp/slim_trainer/",
            restore_path=None,
            variables_to_restore=None,
            run_id=None,
            max_checkpoints_to_keep=0,
            **kwargs):
        """ Run training.

        Parameters
        ----------
        feed_generator : Iterator[ndarray, ...]
            An iterator or generator that returns batches of training data; must
            return a one-to-one correspondence with the `enqueue_vars` passed
            to the constructor of this class.
        train_op : tf.Tensor
            The training operation created with `slim.learning.create_train_op`.
        log_dir : Optional[str]
            Path to TensorFlow log directory. This value is used in conjunction
            with `run_id` to generate the checkpoint and summary directory;
            defaults to '/tmp/slim_trainer'.
        restore_path : Optional[str]
            An optional checkpoint path. If not None, resumes training from the
            given checkpoint.
        variables_to_restore : Optional[List[str]]
            An optional list of variable scopes. If not None, only restores
            variables under the given scope. This value is ignored if
            `restore_path` is None.
        run_id : Optional[str]
            A string that identifies this training run. The checkpoints and
            TensorFlow summaries are stored in `log_dir/run_id`. If None, a
            random ID will be generated. Point tensorboard to this directory to
            monitor training progress.
        max_checkpoints_to_keep : int
            Keep only the `max_checkpoints_to_keep` newest checkpoints. If 0,
            keep all checkpoints.
        kwargs:
            Additional named arguments passed on to tf.slim.learning.train,
            e.g., `number_of_steps=100` to run 100 iterations of training.

        """
        if restore_path is not None:
            if variables_to_restore is None:
                variables_to_restore = slim.get_variables_to_restore()
            init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                restore_path, variables_to_restore)
            self._init_fns.append(
                lambda sess: sess.run(init_assign_op, init_feed_dict))
        self._feed_generator = ThreadSafeIterator(feed_generator)
        self._coordinator = tf.train.Coordinator()

        if run_id is None:
            run_id = _generate_run_id(6)
        log_dir = os.path.join(log_dir, run_id)
        print("---------------------------------------")
        print("Run ID: ", run_id)
        print("Log directory: ", log_dir)
        print("---------------------------------------")

        saver = tf.compat.v1.train.Saver(max_to_keep=max_checkpoints_to_keep)
        try:
            slim.learning.train(train_op,
                                log_dir,
                                self._train_step_fn,
                                saver=saver,
                                **kwargs)
        except UnboundLocalError:
            # NOTE(nwojke): Due to a bug in slim, a local variable 'total_loss'
            # is referenced when an exception is raised during training. We
            # catch the exception here because it occurs whenever we close the
            # queue with self._stop_all_threads().
            pass
        self._wait_for_threads()
Пример #29
0
    def __init__(self, FLAGS, session, prefix='model'):
        '''initialize model
    '''
        self.sess = session
        self.action_dim = FLAGS.action_dim
        # self.output_size = FLAGS.output_size
        self.prefix = prefix
        self.device = FLAGS.device
        self.FLAGS = FLAGS

        self.lr = self.FLAGS.learning_rate
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        #define the input size of the network input
        if self.FLAGS.network == 'mobile':
            self.input_size = [
                mobile_net.default_image_size[FLAGS.depth_multiplier],
                mobile_net.default_image_size[FLAGS.depth_multiplier], 3
            ]
        elif self.FLAGS.network == 'mobile_nfc':
            self.input_size = [
                mobile_nfc_net.default_image_size[FLAGS.depth_multiplier],
                mobile_nfc_net.default_image_size[FLAGS.depth_multiplier],
                3 * self.FLAGS.n_frames
            ]
        elif sum([
                self.FLAGS.network.startswith(name)
                for name in ['alex', 'squeeze', 'tiny']
        ]):
            versions = {
                'alex': alex_net,
                'alex_v1': alex_net_v1,
                'alex_v2': alex_net_v2,
                'alex_v3': alex_net_v3,
                'alex_v4': alex_net_v4,
                'squeeze': squeeze_net,
                'squeeze_v1': squeeze_net_v1,
                'squeeze_v2': squeeze_net_v2,
                'squeeze_v3': squeeze_net_v3,
                'tiny': tiny_net,
                'tiny_v1': tiny_net_v1,
                'tiny_v2': tiny_net_v2,
                'tiny_v2r': tiny_net_v2_r,
                'tiny_v3': tiny_net_v3,
                'tiny_v4': tiny_net_v4,
                'tiny_CAM': tiny_CAM_net
            }
            self.input_size = versions[self.FLAGS.network].default_image_size
        else:
            raise NotImplementedError('Network is unknown: ',
                                      self.FLAGS.network)

        self.input_size = [None] + self.input_size
        self.output_size = int(
            self.action_dim if not self.FLAGS.discrete else self.action_dim *
            self.FLAGS.action_quantity)

        # define a network for training and for evaluation
        self.inputs = tf.placeholder(tf.float32,
                                     shape=self.input_size,
                                     name='Inputs')
        self.endpoints = {}
        for mode in ['train', 'eval']:
            self.define_network(mode)
            params = sum([
                reduce(lambda x, y: x * y,
                       v.get_shape().as_list())
                for v in tf.trainable_variables()
            ])
            print("total number of parameters: {0}".format(params))

        if self.FLAGS.discrete:
            self.define_discrete_bins(FLAGS.action_bound,
                                      FLAGS.action_quantity)
            self.add_discrete_control_layers(self.endpoints['train'])
            self.add_discrete_control_layers(self.endpoints['eval'])

        # Only feature extracting part is initialized from pretrained model
        if not self.FLAGS.continue_training:
            # make sure you exclude the prediction layers of the model
            list_to_exclude = ["global_step"]
            list_to_exclude.append("MobilenetV1/control")
            list_to_exclude.append("MobilenetV1/aux_depth")
            list_to_exclude.append("H_fc_control")
            list_to_exclude.append("outputs")
            list_to_exclude.append("MobilenetV1/q_depth")
            list_to_exclude.append("Omega")
            print("[model.py]: only load feature extracting part in network.")
        else:  #If continue training
            print("[model.py]: continue training of total network.")
            # list_to_exclude = ["Omega"]
            list_to_exclude = []
            # In case of lifelonglearning and continue learning:
            # add variables for importance weights of previous domain and keep optimal variables for previous domain
        if self.FLAGS.lifelonglearning or self.FLAGS.update_importance_weights:
            self.define_importance_weights(self.endpoints['train'])

        variables_to_restore = slim.get_variables_to_restore(
            exclude=list_to_exclude)

        # get latest folder out of training directory if there is no checkpoint file
        if self.FLAGS.checkpoint_path[0] != '/':
            self.FLAGS.checkpoint_path = self.FLAGS.summary_dir + self.FLAGS.checkpoint_path
        if not os.path.isfile(self.FLAGS.checkpoint_path + '/checkpoint'):
            try:
                self.FLAGS.checkpoint_path = self.FLAGS.checkpoint_path + '/' + [
                    mpath
                    for mpath in sorted(os.listdir(self.FLAGS.checkpoint_path))
                    if os.path.isdir(self.FLAGS.checkpoint_path + '/' +
                                     mpath) and not mpath[-3:] == 'val'
                    and os.path.isfile(self.FLAGS.checkpoint_path + '/' +
                                       mpath + '/checkpoint')
                ][-1]
            except:
                pass

        if not self.FLAGS.scratch:
            print('checkpoint: {}'.format(self.FLAGS.checkpoint_path))
            try:
                init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                    tf.train.latest_checkpoint(self.FLAGS.checkpoint_path),
                    variables_to_restore)
            except Exception as e:
                print(
                    "Failed to initialize network {0} with checkpoint {1} so training from scratch: {2}"
                    .format(FLAGS.network, FLAGS.checkpoint_path, e.message))
                FLAGS.scratch = True

        # create saver for checkpoints
        self.saver = tf.train.Saver(max_to_keep=10,
                                    keep_checkpoint_every_n_hours=1)

        # Add the loss and metric functions to the graph for both endpoints of train and eval.
        self.targets = tf.placeholder(
            tf.int32,
            [None, self.action_dim]) if FLAGS.discrete else tf.placeholder(
                tf.float32, [None, self.action_dim])
        self.depth_targets = tf.placeholder(tf.float32, [None, 55, 74])

        self.define_metrics(self.endpoints['eval'])

        if self.FLAGS.continue_training and self.FLAGS.lifelonglearning:
            self.define_star_variables(self.endpoints['train'])

        # Define the training op based on the total loss
        self.define_loss(self.endpoints['train'])
        self.define_train()

        # Define summaries
        self.build_summaries()

        init_all = tf_variables.global_variables_initializer()
        self.sess.run([init_all])
        if not self.FLAGS.scratch:
            self.sess.run([init_assign_op], init_feed_dict)
            print('Successfully loaded model from:{}'.format(
                self.FLAGS.checkpoint_path))
        else:
            print('Training model from scratch so no initialization.')

        # FOR DEBUGGING:
        # for v in tf.trainable_variables():
        # #   # assign importance weights 1 to everyting ==> keep everything as close as possible
        # #   # self.sess.run([tf.assign(self.importance_weights[v.name], np.ones((v.get_shape().as_list()))) for i,v in enumerate(tf.trainable_variables())])

        #   # assign importance weights 1 to everything > 10**-5, else 0 ==> binair importance over the weights ==> freeze some part and leave other
        #   old_weight = self.sess.run(self.importance_weights[v.name])
        #   new_weight = old_weight > 10**-2
        #   new_weight = new_weight.astype(np.float32)
        #   self.sess.run(tf.assign(self.importance_weights[v.name], new_weight))

        if self.FLAGS.continue_training and self.FLAGS.lifelonglearning:
            # print info on loaded importance weights
            for v in tf.trainable_variables():
                weights = self.sess.run(self.importance_weights[v.name])
                weights = weights.flatten()
                # print("{0}: {1} ({2}) min: {3} max: {4}".format(v.name, np.mean(weights), np.var(weights), np.amin(weights), np.amax(weights)))
                print("| {0} | {1} | {2} | {3} | ".format(
                    v.name, np.percentile(weights, 1),
                    np.percentile(weights, 50), np.percentile(weights, 100)))

            # assign star_variables after initialization
            self.sess.run([
                tf.assign(self.star_variables[v.name], v)
                for v in tf.trainable_variables()
            ])
def unpickle(file):
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    data = dict['data']
    imgs = np.transpose(np.reshape(data,(-1,32,32,3), order='F'),axes=(0,2,1,3)) #order batch,x,y,color
    y = np.asarray(dict['labels'], dtype='uint8')
    return y, imgs
    
y, imgs = unpickle('/Users/oli/Dropbox/data/CIFAR-10/cifar-10-batches-py/test_batch')
y.shape, imgs.shape

tf.reset_default_graph()
images = tf.placeholder(tf.float32, [None, None, None, 3])
imgs_scaled = tf.image.resize_images(images, (224,224))
slim.nets.vgg.vgg_16(imgs_scaled, is_training=False)
variables_to_restore = slim.get_variables_to_restore()
print('Number of variables to restore {}'.format(len(variables_to_restore)))
init_assign_op, init_feed_dict = slim.assign_from_checkpoint('/Users/oli/Dropbox/server_sync/tf_slim_models/vgg_16.ckpt', variables_to_restore)
sess = tf.Session()
sess.run(init_assign_op, init_feed_dict)

g = tf.get_default_graph()
feed = g.get_tensor_by_name('Placeholder:0')
fetch = g.get_tensor_by_name('vgg_16/fc6/BiasAdd:0')

# Feeding 3 images through the net just for testing
feed_vals = imgs[0:3]
res = sess.run(fetch, feed_dict={feed:feed_vals})
np.shape(feed_vals), res.shape
Пример #31
0
def create_image_trainer(image_shape,
                         num_classes,
                         epochs,
                         batch_size=32,
                         learning_rate_base=0.01,
                         learning_rate_decay_interval=7500,
                         learning_rate_decay=0.99,
                         checkpoint_path=None,
                         model_save_path='model',
                         max_to_keep=100,
                         log_file_path='log/train.log'):
    image_var = tf.placeholder(tf.uint8, (None, ) + image_shape)
    labels_ = tf.placeholder(tf.float32, (None, num_classes))

    preprocessed_image_var = tf.map_fn(
        lambda x: _preprocess(x, is_training=True),
        tf.cast(image_var, tf.float32))

    l2_normalize = True
    factory_fn = _network_factory(num_classes=num_classes,
                                  is_training=True,
                                  weight_decay=1e-8)
    feature_var, logits_var = factory_fn(preprocessed_image_var,
                                         l2_normalize=l2_normalize,
                                         reuse=None)
    feature_dim = feature_var.get_shape().as_list()[-1]
    classification_loss = slim.losses.softmax_cross_entropy(
        logits_var, labels_)
    total_loss = slim.losses.get_total_loss(add_regularization_losses=True)

    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(learning_rate_base, global_step,
                                               learning_rate_decay_interval,
                                               learning_rate_decay)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_step = optimizer.minimize(total_loss, global_step=global_step)

    session = tf.Session()
    #from tensorflow.python import debug as tf_debug
    #session = tf_debug.LocalCLIDebugWrapperSession(session)

    if checkpoint_path is not None:
        saver = tf.train.Saver(slim.get_variables_to_restore())
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
            checkpoint_path, slim.get_variables_to_restore())
        session.run(init_assign_op, feed_dict=init_feed_dict)
    else:
        session.run(tf.global_variables_initializer())

    def trainer(data_x_paths, data_y):
        data_y = np.asarray(data_y)
        saver = tf.train.Saver(max_to_keep=max_to_keep)

        logging.basicConfig(filename=log_file_path,
                            filemode="w",
                            level=logging.DEBUG)
        data_len = len(data_x_paths)
        num_batches = int(data_len / batch_size)

        for epoch in range(epochs):
            indexs = np.arange(data_len)
            np.random.shuffle(indexs)
            s, e = 0, 0
            for i in range(num_batches):
                s, e = i * batch_size, (i + 1) * batch_size
                data_x_batch = np.asarray([
                    cv2.resize(
                        cv2.imread(data_x_paths[index], cv2.IMREAD_COLOR),
                        (image_shape[1], image_shape[0]))
                    for index in indexs[s:e]
                ])
                data_y_batch = data_y[indexs[s:e]]
                batch_data_dict = {
                    image_var: data_x_batch,
                    labels_: data_y_batch
                }
                _, loss_value, global_step_value = session.run(
                    [train_step, total_loss, global_step],
                    feed_dict=batch_data_dict)
                print(global_step_value, loss_value)
                logging.info("%d: %g" % (global_step_value, loss_value))
                if (i + 1) % 1000 == 0:
                    print("save model checkpoint for %d..." %
                          global_step_value)
                    saver.save(session,
                               os.path.join(model_save_path, 'model.ckpt'),
                               global_step=global_step)

            if e < data_len:
                data_x_batch = np.asarray([
                    cv2.resize(
                        cv2.imread(data_x_paths[index], cv2.IMREAD_COLOR),
                        (image_shape[1], image_shape[0]))
                    for index in indexs[e:]
                ])
                data_y_batch = data_y[indexs[s:e]]
                batch_data_dict = {
                    image_var: data_x_batch,
                    labels_: data_y_batch
                }
                _, loss_value, global_step_value = session.run(
                    [train_step, total_loss, global_step],
                    feed_dict=batch_data_dict)
                print(global_step_value, loss_value)
                logging.info("%d: %g" % (global_step_value, loss_value))

            print("save model checkpoint for %d..." % global_step_value)
            saver.save(session,
                       os.path.join(model_save_path, 'model.ckpt'),
                       global_step=global_step)

    return trainer
Пример #32
0
                                   name='Anchor_Placeholder')
    positive_inputs = tf.placeholder(tf.float32, [None, 224, 224, 3],
                                     name='Positive_Placeholder')
    with slim.arg_scope(vgg_arg_scope()):
        positive, net1 = vgg_16(positive_inputs)
    with slim.arg_scope(vgg_arg_scope(reuse=True)):
        anchor, net2 = vgg_16(anchor_inputs)

    #print net2
    #for key,value in net2.items():
    #   print '{}\t : {}'.format(key,value.shape)

    variables_to_restore = slim.get_variables_to_restore(
        exclude=['vgg_16/fc8'])
    #variables_to_restore = slim.get_variables_to_restore()
    init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
        './vgg_16.ckpt', variables_to_restore)

    print 'LOSS.........................................................'
    with tf.name_scope('contrastive-loss'):
        loss = tf.contrib.losses.metric_learning.contrastive_loss(
            labels=label,
            embeddings_anchor=anchor,
            embeddings_positive=positive,
            margin=margin)
        print 'contrastive-loss::', loss.op.name

    with tf.name_scope('Prediction'):
        pred = tf.norm(positive - anchor, ord='euclidean')
        #pred = tf.sqrt(tf.reduce_mean(tf.square(positive-anchor),1))

    with tf.Session(config=tf.ConfigProto(
Пример #33
0
def _create_image_encoder(preprocess_fn,
                          factory_fn,
                          image_shape,
                          batch_size=32,
                          session=None,
                          checkpoint_path=None,
                          loss_mode="cosine"):
    image_var = tf.placeholder(tf.uint8, (None, ) + image_shape)
    image_var = tf.Print(image_var, [image_var], message="placeholder")

    preprocessed_image_var = tf.map_fn(
        lambda x: preprocess_fn(x, is_training=False),
        tf.cast(image_var, tf.float32))

    l2_normalize = loss_mode == "cosine"
    feature_var, _ = factory_fn(preprocessed_image_var,
                                l2_normalize=l2_normalize,
                                reuse=None)
    feature_dim = feature_var.get_shape().as_list()[-1]

    if session is None:
        session = tf.Session()
    if checkpoint_path is not None:
        slim.get_or_create_global_step()
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
            checkpoint_path, slim.get_variables_to_restore())
        session.run(init_assign_op, feed_dict=init_feed_dict)

    def get_print_val(sess, name, feed_dict=None, doprint=True):
        if feed_dict:
            output = sess.run(name, feed_dict=feed_dict)
            if doprint:
                print name, ' transposed ', output.transpose(0, 3, 1, 2).shape
                print output.transpose(0, 3, 1, 2)[0, 0, 0, 0:20]
                print output.transpose(0, 3, 1, 2)[0, 0, 1, 0:20]
        else:
            output = sess.run(name)
            if doprint:
                print name, output.shape
                print output
        return output

    def encoder(data_x):
        out = np.zeros((len(data_x), feature_dim), np.float32)
        print 'datax:', data_x.shape
        print(data_x[0, 0, 0:100, 0])
        print(data_x[0, 0, 0:100, 1])
        print(data_x[0, 0, 0:100, 2])
        #print(data_x[1,0,0:100,0])
        #print(data_x[1,0,0:100,1])
        #print(data_x[1,0,0:100,2])

        #_run_in_batches(
        #    lambda x: session.run(feature_var, feed_dict=x),
        #    #lambda x: session.run('conv1_1/Elu:0', feed_dict=x),
        #    {image_var: data_x}, out, batch_size)
        if 1:
            elu_1_data = get_print_val(session,
                                       'Elu_1:0',
                                       feed_dict={image_var: data_x})
            get_print_val(session,
                          'conv3_1/1/Elu:0',
                          feed_dict={image_var: data_x})

            #else:
            conv3_1_weights = get_print_val(session,
                                            'conv3_1/1/weights:0',
                                            doprint=False)
            mean = get_print_val(session,
                                 'conv3_1/1/conv3_1/1/bn/moving_mean:0')
            var = get_print_val(session,
                                'conv3_1/1/conv3_1/1/bn/moving_variance:0')
            offset = get_print_val(session, 'conv3_1/1/conv3_1/1/bn/beta:0')
            print 'offset ', offset.shape, type(offset[0])
            session.close()

            scale = np.ones(offset.shape, dtype=np.float32)
            print 'scale ', scale.shape, type(scale[0])

            ####### new graph
            elu_1 = tf.placeholder(tf.float32, (None, ) + (64, 32, 32))
            print 'elu_1 shape', elu_1.get_shape()
            dd = tf.nn.conv2d(elu_1,
                              conv3_1_weights, [1, 2, 2, 1],
                              'SAME',
                              name='conv3_1')
            dd = tf.nn.batch_normalization(dd, mean, var, offset, scale, 1e-3)
            dd = tf.nn.elu(dd, name='conv3_1/elu')

            #cn = elu_1.get_shape().as_list()[-1]*2
            #scope = 'conv3_1'
            #conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
            #conv_bias_init = tf.zeros_initializer()
            #conv_regularizer = slim.l2_regularizer(1e-8)
            #elu_1_conv = slim.conv2d(
            #    elu_1, cn, [3, 3], stride=2, activation_fn=tf.nn.elu, padding="SAME",
            #    normalizer_fn=_batch_norm_fn, weights_initializer=conv_weight_init,
            #    biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer,
            #    scope=scope + "/1")
            sess = tf.Session()
            get_print_val(sess, 'conv3_1:0', feed_dict={elu_1: elu_1_data})

            ############

        return out

    return encoder
Пример #34
0
def train():

    seed = 8964
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    pp = pprint.PrettyPrinter()
    pp.pprint(flags.FLAGS.__flags)

    if not os.path.exists(opt.checkpoint_dir):
        os.makedirs(opt.checkpoint_dir)

    with tf.Graph().as_default():
        # Data Loader
        loader = DataLoader(opt)
        tgt_image, src_image_stack, intrinsics = loader.load_train_batch()

        # Build Model
        model = GeoNetModel(opt, tgt_image, src_image_stack, intrinsics)
        loss = model.total_loss

        # Train Op
        if opt.mode == 'train_flow' and opt.flownet_type == "residual":
            # we pretrain DepthNet & PoseNet, then finetune ResFlowNetS
            train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "flow_net")
            vars_to_restore = slim.get_variables_to_restore(include=["depth_net", "pose_net"])
        else:
            train_vars = [var for var in tf.trainable_variables()]
            vars_to_restore = slim.get_model_variables()

        if opt.init_ckpt_file != None:
            init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                                            opt.init_ckpt_file, vars_to_restore)

        optim = tf.train.AdamOptimizer(opt.learning_rate, 0.9)
        train_op = slim.learning.create_train_op(loss, optim,
                                                 variables_to_train=train_vars)

        # Global Step
        global_step = tf.Variable(0,
                                name='global_step',
                                trainable=False)
        incr_global_step = tf.assign(global_step,
                                     global_step+1)

        # Parameter Count
        parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                        for v in train_vars])

        # Saver
        saver = tf.train.Saver([var for var in tf.model_variables()] + \
                                [global_step],
                                max_to_keep=opt.max_to_keep)

        # Session
        sv = tf.train.Supervisor(logdir=opt.checkpoint_dir,
                                 save_summaries_secs=0,
                                 saver=None)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with sv.managed_session(config=config) as sess:
            print('Trainable variables: ')
            for var in train_vars:
                print(var.name)
            print("parameter_count =", sess.run(parameter_count))

            if opt.init_ckpt_file != None:
                sess.run(init_assign_op, init_feed_dict)
            start_time = time.time()

            for step in range(1, opt.max_steps):
                fetches = {
                    "train": train_op,
                    "global_step": global_step,
                    "incr_global_step": incr_global_step
                }
                if step % 100 == 0:
                    fetches["loss"] = loss
                results = sess.run(fetches)
                if step % 100 == 0:
                    time_per_iter = (time.time() - start_time) / 100
                    start_time = time.time()
                    print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f' \
                          % (step, time_per_iter, results["loss"]))
                if step % opt.save_ckpt_freq == 0:
                    saver.save(sess, os.path.join(opt.checkpoint_dir, 'model'), global_step=step)
def train(finetune):
    is_training = True

    # data pipeline
    imgs, true_boxes = gen_data_batch(cfg.data_path, cfg.batch_size*cfg.train.num_gpus)
    imgs_split = tf.split(imgs, cfg.train.num_gpus)
    true_boxes_split = tf.split(true_boxes, cfg.train.num_gpus)

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.), trainable=False)
    #lr = tf.train.piecewise_constant(global_step, cfg.train.lr_steps, cfg.train.learning_rate)
    #optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    learn_rate_decay_step = int(cfg.train.num_samples / cfg.batch_size / cfg.train.num_gpus * cfg.train.learn_rate_decay_epoch)
    learning_rate = tf.train.exponential_decay(cfg.train.learn_rate, global_step, learn_rate_decay_step, cfg.train.learn_rate_decay, staircase=True)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

    # Calculate the gradients for each model tower.
    tower_grads = []
    with tf.variable_scope(tf.get_variable_scope()):
        for i in range(cfg.train.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (cfg.train.tower, i)) as scope:
                    model = SenseClsNet(imgs_split[i], true_boxes_split[i], is_training)
                    loss = model.compute_loss()
                    tf.get_variable_scope().reuse_variables()
                    grads = optimizer.compute_gradients(loss)
                    tower_grads.append(grads)
                    if i == 0:
                        current_loss = loss
                        update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                        # print(tf.GraphKeys.UPDATE_OPS)
                        # print(update_op)
                        # print(grads)
                        # vars_det = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="BioRecNet")
    grads = average_gradients(tower_grads)
    with tf.control_dependencies(update_op):
        apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)
        train_op = tf.group(apply_gradient_op,*update_op)

    # GPU config
    config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Create a saver
    saver = tf.train.Saver(max_to_keep=1000)
    ckpt_dir = cfg.ckpt_path
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    # init
    sess.run(tf.global_variables_initializer())
    if finetune:
        checkpoint = './pre_train.ckpt'

        # variables_to_restore = slim.get_variables_to_restore()
        # init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
        # sess.run(init_assign_op, init_feed_dict)

        variables_to_restore = get_variables_to_restore(exclude_global_pool=True)
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
        sess.run(init_assign_op, init_feed_dict)

    # running
    cnt_epoch = 0

    for i in range(1, cfg.train.max_batches):
        _, loss_, lr_ = sess.run([train_op, current_loss, learning_rate])
        if(i % 5 == 0):
            print(i,': ', loss_, '          lr: ', lr_)
        if int(i) % int(cfg.train.num_samples / cfg.train.num_gpus / cfg.batch_size) == 0:
            cnt_epoch += 1
            saver.save(sess, ckpt_dir+'senceCls', global_step=cnt_epoch, write_meta_graph=True)