Пример #1
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
    """Runs inference. Refer to flags in inference.py for details."""
    inference_model = model.Model(is_training=False,
                                  batch_size=batch_size,
                                  img_height=img_height,
                                  img_width=img_width,
                                  seq_length=seq_length,
                                  architecture=architecture,
                                  imagenet_norm=imagenet_norm,
                                  use_skip=use_skip,
                                  joint_encoder=joint_encoder)
    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    saver = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
    with sv.managed_session() as sess:
        saver.restore(sess, model_ckpt)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        LOGGING.info('Predictions will be saved in %s.', output_dir)

        # Collect all images to run inference on.
        im_files, basepath_in = collect_input_images(input_dir,
                                                     input_list_file,
                                                     file_extension)
        if shuffle:
            LOGGING.info('Shuffling data...')
            np.random.shuffle(im_files)
        LOGGING.info('Running inference on %d files.', len(im_files))

        # Create missing output folders and pre-compute target directories.
        output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

        # Run depth prediction network.
        if depth:
            im_batch = []
            for i in range(len(im_files)):
                if i % 100 == 0:
                    LOGGING.info('%s of %s files processed.', i, len(im_files))

                # Read image and run inference.
                if inference_mode == INFERENCE_MODE_SINGLE:
                    if inference_crop == INFERENCE_CROP_NONE:
                        im = util.load_image(im_files[i],
                                             resize=(img_width, img_height))
                    elif inference_crop == INFERENCE_CROP_CITYSCAPES:
                        im = util.crop_cityscapes(util.load_image(im_files[i]),
                                                  resize=(img_width,
                                                          img_height))
                elif inference_mode == INFERENCE_MODE_TRIPLETS:
                    im = util.load_image(im_files[i],
                                         resize=(img_width * 3, img_height))
                    im = im[:, img_width:img_width * 2]
                if flip_for_depth:
                    im = np.flip(im, axis=1)
                im_batch.append(im)

                if len(im_batch) == batch_size or i == len(im_files) - 1:
                    # Call inference on batch.
                    for _ in range(batch_size -
                                   len(im_batch)):  # Fill up batch.
                        im_batch.append(
                            np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
                    im_batch = np.stack(im_batch, axis=0)
                    est_depth = inference_model.inference_depth(im_batch, sess)
                    if flip_for_depth:
                        est_depth = np.flip(est_depth, axis=2)
                        im_batch = np.flip(im_batch, axis=2)

                    for j in range(len(im_batch)):
                        color_map = util.normalize_depth_for_display(
                            np.squeeze(est_depth[j]))
                        visualization = np.concatenate(
                            (im_batch[j], color_map), axis=0)
                        # Save raw prediction and color visualization. Extract filename
                        # without extension from full path: e.g. path/to/input_dir/folder1/
                        # file1.png -> file1
                        k = i - len(im_batch) + 1 + j
                        filename_root = os.path.splitext(
                            os.path.basename(im_files[k]))[0]
                        pref = '_flip' if flip_for_depth else ''
                        output_raw = os.path.join(
                            output_dirs[k], filename_root + pref + '.npy')
                        output_vis = os.path.join(
                            output_dirs[k], filename_root + pref + '.png')
                        with gfile.Open(output_raw, 'wb') as f:
                            np.save(f, est_depth[j])
                        util.save_image(output_vis, visualization,
                                        file_extension)
                    im_batch = []

        # Run egomotion network.
        if egomotion:
            if inference_mode == INFERENCE_MODE_SINGLE:
                # Run regular egomotion inference loop.
                input_image_seq = []
                input_seg_seq = []
                current_sequence_dir = None
                current_output_handle = None
                for i in range(len(im_files)):
                    sequence_dir = os.path.dirname(im_files[i])
                    if sequence_dir != current_sequence_dir:
                        # Assume start of a new sequence, since this image lies in a
                        # different directory than the previous ones.
                        # Clear egomotion input buffer.
                        output_filepath = os.path.join(output_dirs[i],
                                                       'egomotion.txt')
                        if current_output_handle is not None:
                            current_output_handle.close()
                        current_sequence_dir = sequence_dir
                        LOGGING.info('Writing egomotion sequence to %s.',
                                     output_filepath)
                        current_output_handle = gfile.Open(
                            output_filepath, 'w')
                        input_image_seq = []
                    im = util.load_image(im_files[i],
                                         resize=(img_width, img_height))
                    input_image_seq.append(im)
                    if use_masks:
                        im_seg_path = im_files[i].replace(
                            '.%s' % file_extension, '-seg.%s' % file_extension)
                        if not gfile.Exists(im_seg_path):
                            raise ValueError(
                                'No segmentation mask %s has been found for '
                                'image %s. If none are available, disable '
                                'use_masks.' % (im_seg_path, im_files[i]))
                        input_seg_seq.append(
                            util.load_image(im_seg_path,
                                            resize=(img_width, img_height),
                                            interpolation='nn'))

                    if len(input_image_seq
                           ) < seq_length:  # Buffer not filled yet.
                        continue
                    if len(input_image_seq
                           ) > seq_length:  # Remove oldest entry.
                        del input_image_seq[0]
                        if use_masks:
                            del input_seg_seq[0]

                    input_image_stack = np.concatenate(input_image_seq, axis=2)
                    input_image_stack = np.expand_dims(input_image_stack,
                                                       axis=0)
                    if use_masks:
                        input_image_stack = mask_image_stack(
                            input_image_stack, input_seg_seq)
                    est_egomotion = np.squeeze(
                        inference_model.inference_egomotion(
                            input_image_stack, sess))
                    egomotion_str = []
                    for j in range(seq_length - 1):
                        egomotion_str.append(','.join(
                            [str(d) for d in est_egomotion[j]]))
                    current_output_handle.write(
                        str(i) + ' ' + ' '.join(egomotion_str) + '\n')
                if current_output_handle is not None:
                    current_output_handle.close()
            elif inference_mode == INFERENCE_MODE_TRIPLETS:
                written_before = []
                for i in range(len(im_files)):
                    im = util.load_image(im_files[i],
                                         resize=(img_width * 3, img_height))
                    input_image_stack = np.concatenate([
                        im[:, :img_width], im[:, img_width:img_width * 2],
                        im[:, img_width * 2:]
                    ],
                                                       axis=2)
                    input_image_stack = np.expand_dims(input_image_stack,
                                                       axis=0)
                    if use_masks:
                        im_seg_path = im_files[i].replace(
                            '.%s' % file_extension, '-seg.%s' % file_extension)
                        if not gfile.Exists(im_seg_path):
                            raise ValueError(
                                'No segmentation mask %s has been found for '
                                'image %s. If none are available, disable '
                                'use_masks.' % (im_seg_path, im_files[i]))
                        seg = util.load_image(im_seg_path,
                                              resize=(img_width * 3,
                                                      img_height),
                                              interpolation='nn')
                        input_seg_seq = [
                            seg[:, :img_width], seg[:,
                                                    img_width:img_width * 2],
                            seg[:, img_width * 2:]
                        ]
                        input_image_stack = mask_image_stack(
                            input_image_stack, input_seg_seq)
                    est_egomotion = inference_model.inference_egomotion(
                        input_image_stack, sess)
                    est_egomotion = np.squeeze(est_egomotion)
                    egomotion_1_2 = ','.join(
                        [str(d) for d in est_egomotion[0]])
                    egomotion_2_3 = ','.join(
                        [str(d) for d in est_egomotion[1]])

                    output_filepath = os.path.join(output_dirs[i],
                                                   'egomotion.txt')
                    file_mode = 'w' if output_filepath not in written_before else 'a'
                    with gfile.Open(output_filepath,
                                    file_mode) as current_output_handle:
                        current_output_handle.write(
                            str(i) + ' ' + egomotion_1_2 + ' ' +
                            egomotion_2_3 + '\n')
                    written_before.append(output_filepath)
            LOGGING.info('Done.')
Пример #2
0
def finetune_inference(train_model, model_ckpt, output_dir):
    """Train model."""
    vars_to_restore = None
    if model_ckpt is not None:
        vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
        ckpt_path = model_ckpt
    pretrain_restorer = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir=None,
                             save_summaries_secs=0,
                             saver=None,
                             summary_op=None)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    img_nr = 0
    failed_heuristic = []
    with sv.managed_session(config=config) as sess:
        # TODO(casser): Caching the weights would be better to avoid I/O bottleneck.
        while True:  # Loop terminates when all examples have been processed.
            if model_ckpt is not None:
                logging.info('Restored weights from %s', ckpt_path)
                pretrain_restorer.restore(sess, ckpt_path)
            logging.info('Running fine-tuning, image %s...', img_nr)
            img_pred_folder = os.path.join(output_dir,
                                           FLAGS.ft_name + 'id_' + str(img_nr))
            if not gfile.Exists(img_pred_folder):
                gfile.MakeDirs(img_pred_folder)
            step = 1

            # Run fine-tuning.
            while step <= FLAGS.num_steps:
                logging.info('Running step %s of %s.', step, FLAGS.num_steps)
                fetches = {
                    'train': train_model.train_op,
                    'global_step': train_model.global_step,
                    'incr_global_step': train_model.incr_global_step
                }
                _ = sess.run(fetches)
                if step % SAVE_EVERY == 0:
                    # Get latest prediction for middle frame, highest scale.
                    pred = train_model.depth[1][0].eval(session=sess)
                    if FLAGS.flip:
                        pred = np.flip(pred, axis=2)
                    input_img = train_model.image_stack.eval(session=sess)
                    input_img_prev = input_img[0, :, :, 0:3]
                    input_img_center = input_img[0, :, :, 3:6]
                    input_img_next = input_img[0, :, :, 6:]
                    img_pred_file = os.path.join(
                        img_pred_folder,
                        str(step).zfill(10) + ('_flip' if FLAGS.flip else '') +
                        '.npy')
                    motion = np.squeeze(
                        train_model.egomotion.eval(session=sess))
                    # motion of shape (seq_length - 1, 6).
                    motion = np.mean(
                        motion, axis=0)  # Average egomotion across frames.

                    if SAVE_PREVIEWS or step == FLAGS.num_steps:
                        # Also save preview of depth map.
                        color_map = util.normalize_depth_for_display(
                            np.squeeze(pred[0, :, :]))
                        visualization = np.concatenate(
                            (input_img_prev, input_img_center, input_img_next,
                             color_map))
                        motion_s = [str(m) for m in motion]
                        s_rep = ','.join(motion_s)
                        with gfile.Open(img_pred_file.replace('.npy', '.txt'),
                                        'w') as f:
                            f.write(s_rep)
                        util.save_image(
                            img_pred_file.replace('.npy', '.%s' %
                                                  FLAGS.file_extension),
                            visualization, FLAGS.file_extension)

                    with gfile.Open(img_pred_file, 'wb') as f:
                        np.save(f, pred)

                # Apply heuristic to not finetune if egomotion magnitude is too low.
                ego_magnitude = np.linalg.norm(motion[:3], ord=2)
                heuristic = ego_magnitude >= FLAGS.egomotion_threshold
                if not heuristic and step == FLAGS.num_steps:
                    failed_heuristic.append(img_nr)

                step += 1
            img_nr += 1
    return failed_heuristic
Пример #3
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
    """Runs inference. Refer to flags in inference.py for details."""
    inference_model = model.Model(is_training=False,
                                  batch_size=1,
                                  img_height=128,
                                  img_width=416)

    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    saver = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
    with sv.managed_session() as sess:
        saver.restore(sess, model_ckpt)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        logging.info('Predictions will be saved in %s.', output_dir)

        # Collect all images to run inference on.
        im_files, basepath_in = collect_input_images(input_dir,
                                                     input_list_file,
                                                     file_extension)
        if shuffle:
            logging.info('Shuffling data...')
            np.random.shuffle(im_files)
        logging.info('Running inference on %d files.', len(im_files))

        # Create missing output folders and pre-compute target directories.
        output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

        # Run depth prediction network.
        if depth:
            im_batch = []
            npys = []
            for i in range(len(im_files)):
                if i % 100 == 0:
                    logging.info('%s of %s files processed.', i, len(im_files))

                # Read image and run inference.
                if inference_mode == INFERENCE_MODE_SINGLE:
                    if inference_crop == INFERENCE_CROP_NONE:
                        im = util.load_image(im_files[i],
                                             resize=(img_width, img_height))
                    elif inference_crop == INFERENCE_CROP_CITYSCAPES:
                        im = util.crop_cityscapes(util.load_image(im_files[i]),
                                                  resize=(img_width,
                                                          img_height))
                elif inference_mode == INFERENCE_MODE_TRIPLETS:
                    im = util.load_image(im_files[i],
                                         resize=(img_width * 3, img_height))
                    im = im[:, img_width:img_width * 2]
                if flip_for_depth:
                    im = np.flip(im, axis=1)
                im_batch.append(im)

                if len(im_batch) == batch_size or i == len(im_files) - 1:
                    # Call inference on batch.
                    for _ in range(batch_size -
                                   len(im_batch)):  # Fill up batch.
                        im_batch.append(
                            np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
                    im_batch = np.stack(im_batch, axis=0)
                    est_depth = inference_model.inference_depth(im_batch, sess)
                    if flip_for_depth:
                        est_depth = np.flip(est_depth, axis=2)
                        im_batch = np.flip(im_batch, axis=2)

                    for j in range(len(im_batch)):
                        color_map = util.normalize_depth_for_display(
                            np.squeeze(est_depth[j]))
                        visualization = np.concatenate(
                            (im_batch[j], color_map), axis=0)
                        # Save raw prediction and color visualization. Extract filename
                        # without extension from full path: e.g. path/to/input_dir/folder1/
                        # file1.png -> file1
                        k = i - len(im_batch) + 1 + j
                        filename_root = os.path.splitext(
                            os.path.basename(im_files[k]))[0]
                        pref = '_flip' if flip_for_depth else ''
                        output_raw = os.path.join(
                            output_dirs[k], filename_root + pref + '.npy')
                        output_vis = os.path.join(
                            output_dirs[k], filename_root + pref + '.png')
                        with gfile.Open(output_raw, 'wb') as f:
                            np.save(f, est_depth[j])
                            npys.append(est_depth[j])
                        util.save_image(output_vis, visualization,
                                        file_extension)
                    im_batch = []
            with gfile.Open(output_dir + "result.npy", 'wb') as f:
                np.save(f, npys)
def run_inference(output_dir=output_dir,
                  file_extension='png',
                  depth=True,
                  egomotion=False,
                  model_ckpt=model_ckpt,
                  input_list_file=None,
                  batch_size=1,
                  img_height=256,
                  img_width=416,
                  seq_length=3,
                  architecture=nets.RESNET,
                  imagenet_norm=True,
                  use_skip=True,
                  joint_encoder=True,
                  shuffle=False,
                  flip_for_depth=False,
                  inference_mode=INFERENCE_MODE_SINGLE,
                  inference_crop=INFERENCE_CROP_NONE,
                  use_masks=False):
    inference_model = model.Model(is_training=False,
                                  batch_size=batch_size,
                                  img_height=img_height,
                                  img_width=img_width,
                                  seq_length=seq_length,
                                  architecture=architecture,
                                  imagenet_norm=imagenet_norm,
                                  use_skip=use_skip,
                                  joint_encoder=joint_encoder)
    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    saver = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
    with sv.managed_session() as sess:
        saver.restore(sess, model_ckpt)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        logging.info('Predictions will be saved in %s.', output_dir)

        #input camera image
        video_capture = cv2.VideoCapture(video_data)
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        fps = int(video_capture.get(cv2.CAP_PROP_FPS))
        out = cv2.VideoWriter(output_dir + '/' + 'try_1229.mp4', fourcc, fps,
                              output_size)
        frame_count = (int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)))

        while True:
            if depth:

                im_batch = []

                for i in range(frame_count):

                    if i % 100 == 0:
                        logging.info('%s of %s files processed.', i,
                                     range(frame_count))

                    # struct2depth ここから ---------------------------
                    ret, im = video_capture.read()

                    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                    im = cv2.resize(im, (img_width, img_height))
                    im = np.array(im, dtype=np.float32) / 255.0

                    im_batch.append(im)
                    for _ in range(batch_size -
                                   len(im_batch)):  # Fill up batch.
                        im_batch.append(
                            np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))

                    im_batch = np.stack(im_batch, axis=0)
                    est_depth = inference_model.inference_depth(im_batch, sess)

                    color_map = util.normalize_depth_for_display(
                        np.squeeze(est_depth))
                    image_frame = np.concatenate((im_batch[0], color_map),
                                                 axis=0)
                    #logging.info(image_frame.shape)
                    image_frame = (image_frame * 255.0).astype(np.uint8)
                    image_frame = cv2.cvtColor(image_frame, cv2.COLOR_RGB2BGR)
                    #structdepth ここまで ---------------------------

                    detect(image_frame)  #yolo

                    out.write(image_frame)
                    logging.info('Frame written')
                    im_batch = []

        logging.info('Done.')
        video_capture.release()
        out.release()
Пример #5
0
def train(train_model, pretrained_ckpt, imagenet_ckpt, checkpoint_dir,
          train_steps, summary_freq):
  """Train model."""
  vars_to_restore = None
  if pretrained_ckpt is not None:
    vars_to_restore = util.get_vars_to_save_and_restore(pretrained_ckpt)
    ckpt_path = pretrained_ckpt
  elif imagenet_ckpt:
    vars_to_restore = util.get_imagenet_vars_to_restore(imagenet_ckpt)
    ckpt_path = imagenet_ckpt
  pretrain_restorer = tf.train.Saver(vars_to_restore)
  vars_to_save = util.get_vars_to_save_and_restore()
  vars_to_save[train_model.global_step.op.name] = train_model.global_step
  saver = tf.train.Saver(vars_to_save, max_to_keep=MAX_TO_KEEP)
  sv = tf.train.Supervisor(logdir=checkpoint_dir, save_summaries_secs=0,
                           saver=None)
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  with sv.managed_session(config=config) as sess:
    # sess.run(tf.local_variables_initializer())
    if pretrained_ckpt is not None or imagenet_ckpt:
      logging.info('Restoring pretrained weights from %s', ckpt_path)
      pretrain_restorer.restore(sess, ckpt_path)

    logging.info('Attempting to resume training from %s...', checkpoint_dir)
    checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    logging.info('Last checkpoint found: %s', checkpoint)
    if checkpoint:
      saver.restore(sess, checkpoint)

    logging.info('Training...')
    start_time = time.time()
    last_summary_time = time.time()
    steps_per_epoch = train_model.reader.steps_per_epoch
    step = 1
    while step <= train_steps:
      fetches = {
          'train': train_model.train_op,
          'global_step': train_model.global_step,
          'incr_global_step': train_model.incr_global_step
      }
      if step % summary_freq == 0:
        fetches['loss'] = train_model.total_loss
        fetches['summary'] = sv.summary_op

      results = sess.run(fetches)
      global_step = results['global_step']

      if step % summary_freq == 0:
        sv.summary_writer.add_summary(results['summary'], global_step)
        train_epoch = math.ceil(global_step / steps_per_epoch)
        train_step = global_step - (train_epoch - 1) * steps_per_epoch
        this_cycle = time.time() - last_summary_time
        last_summary_time += this_cycle
        logging.info(
            'Epoch: [%2d] [%5d/%5d] time: %4.2fs (%ds total) loss: %.3f',
            train_epoch, train_step, steps_per_epoch, this_cycle,
            time.time() - start_time, results['loss'])

      if step % steps_per_epoch == 0:
        logging.info('[*] Saving checkpoint to %s...', checkpoint_dir)
        saver.save(sess, os.path.join(checkpoint_dir, 'model'),
                   global_step=global_step)

      # Setting step to global_step allows for training for a total of
      # train_steps even if the program is restarted during training.
      step = global_step + 1
Пример #6
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
    """Runs inference. Refer to flags in inference.py for details."""
    inference_model = model.Model(is_training=False,
                                  batch_size=batch_size,
                                  img_height=img_height,
                                  img_width=img_width,
                                  seq_length=seq_length,
                                  architecture=architecture,
                                  imagenet_norm=imagenet_norm,
                                  use_skip=use_skip,
                                  joint_encoder=joint_encoder)
    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    saver = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
    with sv.managed_session() as sess:
        saver.restore(sess, model_ckpt)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        logging.info('Predictions will be saved in %s.', output_dir)

        basepath_in = os.getcwd()
        segmented_weights_dir = os.path.join("", basepath_in,
                                             "segmentation/pretrained", "")
        print("Segmentation weights dir:", segmented_weights_dir)
        output_dir = os.path.join(output_dir, basepath_in, output_dir, "")

        # Feeding images from webcam
        cap = cv2.VideoCapture(0)
        cv2.namedWindow('Recording', cv2.WINDOW_AUTOSIZE)
        i = 0

        while True:
            ret_val, frame = cap.read()
            VisualizeResults.seg(frame)

            if depth:
                logging.info('%s processed.', i)
                # Resizing the image to (416*128)
                # final_image = cv2.resize(
                #     frame, (img_width, img_height), interpolation=cv2.INTER_AREA)
                # Getting the segmentation mask
                # segmentation_mask = VisualizeResults.main(
                #     frame, output_dir, segmented_weights_dir)
                # input_image_seq = []
                # input_seg_seq = []
                # input_image_seq.append(final_image)
                # input_seg_seq.append(cv2.resize(segmentation_mask,
                #                                 resize=(img_width, img_height),
                #                                 interpolation='nn'))
                # if use_masks:
                #   input_image_stack = mask_image_stack(input_image_stack,
                #                                         input_seg_seq)
                # est_egomotion = np.squeeze(inference_model.inference_egomotion(
                #     input_image_stack, sess))
                # input_image_stack = np.concatenate(input_image_seq, axis=2)
                # input_image_stack = np.expand_dims(input_image_stack, axis=0)
                # Resizing the image to (416*128)
                final_image = cv2.resize(frame, (img_width, img_height),
                                         interpolation=cv2.INTER_AREA)

                # Estimating depth
                est_depth = inference_model.inference_depth([final_image],
                                                            sess)
                # est_depth is the matrix of depths

                # Creating output
                color_map = util.normalize_depth_for_display(
                    np.squeeze(est_depth[0]))
                visualization = np.concatenate((final_image, color_map),
                                               axis=0)
                output_vis = os.path.join(output_dir,
                                          str(i) + "-depth" + '.png')
                # output_image = os.path.join(
                #     output_dir, str(i) + '.png')
                util.save_image(output_vis, visualization, file_extension)
                # util.save_image(output_vis, color_map, file_extension)
                # util.save_image(output_image, final_image, file_extension)
            i = i + 1
            # break
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        cv2.destroyAllWindows()

        print("Done!")
Пример #7
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
  """Runs inference. Refer to flags in inference.py for details."""
  inference_model = model.Model(is_training=False,
                                batch_size=batch_size,
                                img_height=img_height,
                                img_width=img_width,
                                seq_length=seq_length,
                                architecture=architecture,
                                imagenet_norm=imagenet_norm,
                                use_skip=use_skip,
                                joint_encoder=joint_encoder)
  vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
  saver = tf.train.Saver(vars_to_restore)
  sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
  with sv.managed_session() as sess:
    saver.restore(sess, model_ckpt)
    if not gfile.Exists(output_dir):
      gfile.MakeDirs(output_dir)
    logging.info('Predictions will be saved in %s.', output_dir)

    # Collect all images to run inference on.
    im_files, basepath_in = collect_input_images(input_dir, input_list_file,
                                                 file_extension)
    if shuffle:
      logging.info('Shuffling data...')
      np.random.shuffle(im_files)
    logging.info('Running inference on %d files.', len(im_files))

    # Create missing output folders and pre-compute target directories.
    output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

    # Run depth prediction network.
    if depth:
      im_batch = []
      for i in range(len(im_files)):
        if i % 100 == 0:
          logging.info('%s of %s files processed.', i, len(im_files))

        # Read image and run inference.
        if inference_mode == INFERENCE_MODE_SINGLE:
          if inference_crop == INFERENCE_CROP_NONE:
            im = util.load_image(im_files[i], resize=(img_width, img_height))
          elif inference_crop == INFERENCE_CROP_CITYSCAPES:
            im = util.crop_cityscapes(util.load_image(im_files[i]),
                                      resize=(img_width, img_height))
        elif inference_mode == INFERENCE_MODE_TRIPLETS:
          im = util.load_image(im_files[i], resize=(img_width * 3, img_height))
          im = im[:, img_width:img_width*2]
        if flip_for_depth:
          im = np.flip(im, axis=1)
        im_batch.append(im)

        if len(im_batch) == batch_size or i == len(im_files) - 1:
          # Call inference on batch.
          for _ in range(batch_size - len(im_batch)):  # Fill up batch.
            im_batch.append(np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
          im_batch = np.stack(im_batch, axis=0)
          est_depth = inference_model.inference_depth(im_batch, sess)
          if flip_for_depth:
            est_depth = np.flip(est_depth, axis=2)
            im_batch = np.flip(im_batch, axis=2)

          for j in range(len(im_batch)):
            color_map = util.normalize_depth_for_display(
                np.squeeze(est_depth[j]))
            visualization = np.concatenate((im_batch[j], color_map), axis=0)
            # Save raw prediction and color visualization. Extract filename
            # without extension from full path: e.g. path/to/input_dir/folder1/
            # file1.png -> file1
            k = i - len(im_batch) + 1 + j
            filename_root = os.path.splitext(os.path.basename(im_files[k]))[0]
            pref = '_flip' if flip_for_depth else ''
            output_raw = os.path.join(
                output_dirs[k], filename_root + pref + '.npy')
            output_vis = os.path.join(
                output_dirs[k], filename_root + pref + '.png')
            with gfile.Open(output_raw, 'wb') as f:
              np.save(f, est_depth[j])
            util.save_image(output_vis, visualization, file_extension)
          im_batch = []

    # Run egomotion network.
    "Wektor zawierajacy wyliczane przemieszczenia kamery"
    egomotion_tuple = []
    if egomotion:
      if inference_mode == INFERENCE_MODE_SINGLE:
        # Run regular egomotion inference loop.
        input_image_seq = []
        input_seg_seq = []
        current_sequence_dir = None
        current_output_handle = None
        "Wczytywanie odpowiedniej sekwencji zdjęć"
        for i in range(len(im_files)):
          sequence_dir = os.path.dirname(im_files[i])
          if sequence_dir != current_sequence_dir:
            # Assume start of a new sequence, since this image lies in a
            # different directory than the previous ones.
            # Clear egomotion input buffer.
            output_filepath = os.path.join(output_dirs[i], 'egomotion.txt')
            if current_output_handle is not None:
              current_output_handle.close()
            current_sequence_dir = sequence_dir
            logging.info('Writing egomotion sequence to %s.', output_filepath)
            current_output_handle = gfile.Open(output_filepath, 'w')
            input_image_seq = []
          im = util.load_image(im_files[i], resize=(img_width, img_height))
          input_image_seq.append(im)
          if use_masks:
            im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
            if not gfile.Exists(im_seg_path):
              raise ValueError('No segmentation mask %s has been found for '
                               'image %s. If none are available, disable '
                               'use_masks.' % (im_seg_path, im_files[i]))
            input_seg_seq.append(util.load_image(im_seg_path,
                                                 resize=(img_width, img_height),
                                                 interpolation='nn'))

          if len(input_image_seq) < seq_length:  # Buffer not filled yet.
            continue
          if len(input_image_seq) > seq_length:  # Remove oldest entry.
            del input_image_seq[0]
            if use_masks:
              del input_seg_seq[0]

          input_image_stack = np.concatenate(input_image_seq, axis=2)
          input_image_stack = np.expand_dims(input_image_stack, axis=0)
          if use_masks:
            input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)
          "Wyliczanie przekształceń pozycji kamery"
          est_egomotion_ns = inference_model.inference_egomotion(
                input_image_stack, sess)
          est_egomotion_s = np.squeeze(inference_model.inference_egomotion(
              input_image_stack, sess))
          "Zapisywanie przekształceń do listy przekształceń"
          if i == 2:
            egomotion_tuple = est_egomotion_ns[0, 0, :]
            egomotion_tuple = np.expand_dims(egomotion_tuple, axis=0)
          else:
            buff = est_egomotion_ns[0, 0, :]
            buff = np.expand_dims(buff, axis=0)
            egomotion_tuple = np.concatenate((egomotion_tuple,buff ), axis=0)
          egomotion_str = []
          "Zapis przkeształceń do pliku *txt"
          for j in range(seq_length - 1):
            egomotion_str.append(','.join([str(d) for d in est_egomotion_s[j]]))
          current_output_handle.write(
              str(i) + ' ' + ' '.join(egomotion_str) + '\n')
        if current_output_handle is not None:
          current_output_handle.close()

      elif inference_mode == INFERENCE_MODE_TRIPLETS:
        written_before = []
        for i in range(len(im_files)):
          im = util.load_image(im_files[i], resize=(img_width * 3, img_height))
          input_image_stack = np.concatenate(
              [im[:, :img_width], im[:, img_width:img_width*2],
               im[:, img_width*2:]], axis=2)
          input_image_stack = np.expand_dims(input_image_stack, axis=0)
          if use_masks:
            im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
            if not gfile.Exists(im_seg_path):
              raise ValueError('No segmentation mask %s has been found for '
                               'image %s. If none are available, disable '
                               'use_masks.' % (im_seg_path, im_files[i]))
            seg = util.load_image(im_seg_path,
                                  resize=(img_width * 3, img_height),
                                  interpolation='nn')
            input_seg_seq = [seg[:, :img_width], seg[:, img_width:img_width*2],
                             seg[:, img_width*2:]]
            input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)
          est_egomotion = inference_model.inference_egomotion(
              input_image_stack, sess)
          est_egomotion = np.squeeze(est_egomotion)
          egomotion_1_2 = ','.join([str(d) for d in est_egomotion[0]])
          egomotion_2_3 = ','.join([str(d) for d in est_egomotion[1]])

          output_filepath = os.path.join(output_dirs[i], 'egomotion.txt')
          file_mode = 'w' if output_filepath not in written_before else 'a'
          with gfile.Open(output_filepath, file_mode) as current_output_handle:
            current_output_handle.write(str(i) + ' ' + egomotion_1_2 + ' ' +
                                        egomotion_2_3 + '\n')
          written_before.append(output_filepath)
      "Konwertowanie listy przekształceń z numpy na tensor"
      egomotion_tuple = np.expand_dims(egomotion_tuple, axis=0)
      tf.reset_default_graph()
      egomotion_tensor = tf.convert_to_tensor(egomotion_tuple, dtype=tf.float32)
      "Inicjalizacja zmiennych"
      start_position = np.array([0, 0, 0, 1])#Pozycja początkowa
      actual_position = start_position #Aktulalna pozycja
      position_x_vec = []#Wektory pozycji (przmeiszczeń)
      position_y_vec = []
      position_z_vec = []
      position_x_vec = np.append(position_x_vec, actual_position[0])
      position_y_vec = np.append(position_y_vec, actual_position[1])
      position_z_vec = np.append(position_z_vec, actual_position[2])
      actual_transform_matrix = []#Aktualna macierz transformacji
      position_str =[] #Zawiera tekst służący do zapisu danych do pliku *txt
      matrix_transform_str = []
      ego_str = []
      for i in range(0, len(im_files)-2):
        transform_buff =[]#Zmienna bufurująca
        if i == 0: #Generowanie macierzy transformacji dla pierwszje iteracji
            actual_transform_matrix = _egomotion_vec2mat(egomotion_tensor[:, i, :], 1)
            ego_str.append(str(egomotion_tuple[0, i, 0]) + "," + str(egomotion_tuple[0, i, 1]) + "," + str(egomotion_tuple[0, i, 2])
                           + "," + str(egomotion_tuple[0, i, 3]) + "," + str(egomotion_tuple[0, i, 4]) + "," + str(egomotion_tuple[0, i, 5]) + "\n")
        else: #Generowanie macierzy transformacji dla sekwencji 1-2, 2-3 itd. Mnożenie jej przez transformacje zabuforowaną, wcześniejszą.
            transform_matrix = _egomotion_vec2mat(egomotion_tensor[:, i, :], 1)
            ego_str.append(str(egomotion_tuple[0, i, 0]) + "," + str(egomotion_tuple[0, i, 1]) + "," + str(
                egomotion_tuple[0, i, 2])
                           + "," + str(egomotion_tuple[0, i, 3]) + "," + str(egomotion_tuple[0, i, 4]) + "," + str(
                egomotion_tuple[0, i, 5]) + "\n")#Zapis do listy tekstowej wartości przekształceń
            actual_transform_matrix = tf.matmul(actual_transform_matrix, transform_matrix)#Aktualna macierz transformacji
            transform_buff = actual_transform_matrix#Konwetowanie macierzy na wersję numpy
            sess = tf.Session()
            numpy_matrix_transform = sess.run(transform_buff)
            numpy_matrix_transform = np.squeeze(numpy_matrix_transform, axis=0)
            actual_position = np.dot(numpy_matrix_transform, start_position)
            position_x_vec = np.append(position_x_vec, actual_position[0])#Zapis pozycji
            position_y_vec = np.append(position_y_vec, actual_position[1])
            position_z_vec = np.append(position_z_vec, actual_position[2])
            print("Rotation and translation matrix from picture " + str(0) + " to " + str(i))#Informacje konsolowe
            print(numpy_matrix_transform)
            print("Actual position of camera")
            print(actual_position)
            "Przygotowywanie wektora tekstu do zapisu (pozycji i wyiczonych macierzy transformacj)"
            position_str.append(str(actual_position[0]) + "," + str(actual_position[1]) + "," + str(actual_position[2]) + "\n")

            matrix_transform_str.append( str(numpy_matrix_transform[0,0]) + "," + str(numpy_matrix_transform[0,1]) + ","+str(numpy_matrix_transform[0,2]) +\
                           "," + str(numpy_matrix_transform[0, 3])+ ","+str(numpy_matrix_transform[1,0])+ ","+str(numpy_matrix_transform[1,1]) +\
                           "," + str(numpy_matrix_transform[1, 2])+ ","+str(numpy_matrix_transform[1,3])+ ","+str(numpy_matrix_transform[2,0]) +\
                           "," + str(numpy_matrix_transform[2, 1])+ ","+str(numpy_matrix_transform[2,2])+ ","+str(numpy_matrix_transform[2,3]) +\
                           "," + str(numpy_matrix_transform[3, 0])+ ","+str(numpy_matrix_transform[3,1])+ ","+str(numpy_matrix_transform[3,2]) +\
                           "," + str(numpy_matrix_transform[3, 3]) + "\n")
      vectime = []
      "Generowanie wektora czasu"
      for i in range(0, len(im_files) - 2):
          vectime = np.append(vectime,i)

      "Zapis do pliku pozycji, macierzy transformacji, wartości przekształceń oraz zapis wykresów"
      file = open(output_dir + "/Position.txt", "w")
      for i in range(0, len(im_files) - 4):
        file.write(position_str[i])
      file.close()
      file = open(output_dir + "/Transform_matrix.txt", "w")
      for i in range(0, len(im_files) - 4):
          file.write(matrix_transform_str[i])
      file.close()
      file = open(output_dir + "/Egomotion_value.txt", "w")
      for i in range(0, len(im_files) - 4):
          file.write(ego_str[i])
      file.close()
      print("Plotting xt figure")
      fig_xyz = plt.figure()
      ax = fig_xyz.add_subplot(111)
      ax.plot(vectime,position_x_vec)
      fig_xyz.suptitle('Wykres pozycji X od czasu', fontsize=20)
      ax.set_xlabel('T')
      ax.set_ylabel('X')
      plt.savefig(output_dir + '/XT.png', dpi=200)
      plt.close()
      print("Plotting yt figure")
      fig_xyz = plt.figure()
      ax = fig_xyz.add_subplot(111)
      ax.plot(  vectime,position_y_vec)
      fig_xyz.suptitle('Wykres pozycji Y od czasu', fontsize=20)
      ax.set_xlabel('T')
      ax.set_ylabel('Y')
      plt.savefig(output_dir + '/YT.png', dpi=200)
      plt.close()
      print("Plotting zt figure")
      fig_xyz = plt.figure()
      ax = fig_xyz.add_subplot(111)
      ax.plot(vectime, position_z_vec)
      fig_xyz.suptitle('Wykres pozycji Z od czasu', fontsize=20)
      ax.set_xlabel('T')
      ax.set_ylabel('Z')
      plt.savefig(output_dir + '/ZT.png', dpi=200)
      plt.close()
      logging.info('Done.')
Пример #8
0
def train(train_model, pretrained_ckpt, imagenet_ckpt, checkpoint_dir,
          train_steps, summary_freq):
  """Train model."""
  vars_to_restore = None
  if pretrained_ckpt is not None:
    vars_to_restore = util.get_vars_to_save_and_restore(pretrained_ckpt)
    ckpt_path = pretrained_ckpt
  elif imagenet_ckpt:
    vars_to_restore = util.get_imagenet_vars_to_restore(imagenet_ckpt)
    ckpt_path = imagenet_ckpt
  pretrain_restorer = tf.train.Saver(vars_to_restore)
  vars_to_save = util.get_vars_to_save_and_restore()
  vars_to_save[train_model.global_step.op.name] = train_model.global_step
  saver = tf.train.Saver(vars_to_save, max_to_keep=MAX_TO_KEEP)
  sv = tf.train.Supervisor(logdir=checkpoint_dir, save_summaries_secs=0,
                           saver=None)
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  with sv.managed_session(config=config) as sess:
    if pretrained_ckpt is not None or imagenet_ckpt:
      logging.info('Restoring pretrained weights from %s', ckpt_path)
      pretrain_restorer.restore(sess, ckpt_path)

    logging.info('Attempting to resume training from %s...', checkpoint_dir)
    checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    logging.info('Last checkpoint found: %s', checkpoint)
    if checkpoint:
      saver.restore(sess, checkpoint)

    logging.info('Training...')
    start_time = time.time()
    last_summary_time = time.time()
    steps_per_epoch = train_model.reader.steps_per_epoch
    step = 1
    while step <= train_steps:
      fetches = {
          'train': train_model.train_op,
          'global_step': train_model.global_step,
          'incr_global_step': train_model.incr_global_step
      }
      if step % summary_freq == 0:
        fetches['loss'] = train_model.total_loss
        fetches['summary'] = sv.summary_op

      results = sess.run(fetches)
      global_step = results['global_step']

      if step % summary_freq == 0:
        sv.summary_writer.add_summary(results['summary'], global_step)
        train_epoch = math.ceil(global_step / steps_per_epoch)
        train_step = global_step - (train_epoch - 1) * steps_per_epoch
        this_cycle = time.time() - last_summary_time
        last_summary_time += this_cycle
        logging.info(
            'Epoch: [%2d] [%5d/%5d] time: %4.2fs (%ds total) loss: %.3f',
            train_epoch, train_step, steps_per_epoch, this_cycle,
            time.time() - start_time, results['loss'])

      if step % steps_per_epoch == 0:
        logging.info('[*] Saving checkpoint to %s...', checkpoint_dir)
        saver.save(sess, os.path.join(checkpoint_dir, 'model'),
                   global_step=global_step)

      # Setting step to global_step allows for training for a total of
      # train_steps even if the program is restarted during training.
      step = global_step + 1
Пример #9
0
def finetune_inference(train_model, model_ckpt, output_dir):
  """Train model."""
  vars_to_restore = None
  if model_ckpt is not None:
    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    ckpt_path = model_ckpt
  pretrain_restorer = tf.train.Saver(vars_to_restore)
  sv = tf.train.Supervisor(logdir=None, save_summaries_secs=0, saver=None,
                           summary_op=None)
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  img_nr = 0
  failed_heuristic = []
  with sv.managed_session(config=config) as sess:
    # TODO(casser): Caching the weights would be better to avoid I/O bottleneck.
    while True:  # Loop terminates when all examples have been processed.
      if model_ckpt is not None:
        logging.info('Restored weights from %s', ckpt_path)
        pretrain_restorer.restore(sess, ckpt_path)
      logging.info('Running fine-tuning, image %s...', img_nr)
      img_pred_folder = os.path.join(
          output_dir, FLAGS.ft_name + 'id_' + str(img_nr))
      if not gfile.Exists(img_pred_folder):
        gfile.MakeDirs(img_pred_folder)
      step = 1

      # Run fine-tuning.
      while step <= FLAGS.num_steps:
        logging.info('Running step %s of %s.', step, FLAGS.num_steps)
        fetches = {
            'train': train_model.train_op,
            'global_step': train_model.global_step,
            'incr_global_step': train_model.incr_global_step
        }
        _ = sess.run(fetches)
        if step % SAVE_EVERY == 0:
          # Get latest prediction for middle frame, highest scale.
          pred = train_model.depth[1][0].eval(session=sess)
          if FLAGS.flip:
            pred = np.flip(pred, axis=2)
          input_img = train_model.image_stack.eval(session=sess)
          input_img_prev = input_img[0, :, :, 0:3]
          input_img_center = input_img[0, :, :, 3:6]
          input_img_next = input_img[0, :, :, 6:]
          img_pred_file = os.path.join(
              img_pred_folder,
              str(step).zfill(10) + ('_flip' if FLAGS.flip else '') + '.npy')
          motion = np.squeeze(train_model.egomotion.eval(session=sess))
          # motion of shape (seq_length - 1, 6).
          motion = np.mean(motion, axis=0)  # Average egomotion across frames.

          if SAVE_PREVIEWS or step == FLAGS.num_steps:
            # Also save preview of depth map.
            color_map = util.normalize_depth_for_display(
                np.squeeze(pred[0, :, :]))
            visualization = np.concatenate(
                (input_img_prev, input_img_center, input_img_next, color_map))
            motion_s = [str(m) for m in motion]
            s_rep = ','.join(motion_s)
            with gfile.Open(img_pred_file.replace('.npy', '.txt'), 'w') as f:
              f.write(s_rep)
            util.save_image(
                img_pred_file.replace('.npy', '.%s' % FLAGS.file_extension),
                visualization, FLAGS.file_extension)

          with gfile.Open(img_pred_file, 'wb') as f:
            np.save(f, pred)

        # Apply heuristic to not finetune if egomotion magnitude is too low.
        ego_magnitude = np.linalg.norm(motion[:3], ord=2)
        heuristic = ego_magnitude >= FLAGS.egomotion_threshold
        if not heuristic and step == FLAGS.num_steps:
          failed_heuristic.append(img_nr)

        step += 1
      img_nr += 1
  return failed_heuristic
output_node_names_depth = ["truediv"]
output_node_names_egomotion = ["egomotion_prediction/pose_exp_net/pose/concat"]

graph = tf.Graph()
with graph.as_default():
    inference_model = model.Model(is_training=False,
                                  batch_size=1,
                                  img_height=128,
                                  img_width=416,
                                  seq_length=3,
                                  architecture=nets.RESNET,
                                  imagenet_norm=True,
                                  use_skip=True,
                                  joint_encoder=False)
    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt_kitti)
    saver = tf.train.Saver(vars_to_restore)
    with tf.Session() as sess:
        saver.restore(sess, model_ckpt_kitti)
        converted_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
            sess,
            input_graph_def=graph.as_graph_def(),
            output_node_names=output_node_names_depth)
        with tf.gfile.GFile(pb_file_kitti_depth, "wb") as f:
            f.write(converted_graph_def.SerializeToString())

        converted_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
            sess,
            input_graph_def=graph.as_graph_def(),
            output_node_names=output_node_names_egomotion)
        with tf.gfile.GFile(pb_file_kitti_egomotion, "wb") as f:
Пример #11
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   objmotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
  """Runs inference. Refer to flags in inference.py for details."""
  inference_model = model.Model(is_training=False,
                                batch_size=batch_size,
                                img_height=img_height,
                                img_width=img_width,
                                seq_length=seq_length,
                                architecture=architecture,
                                imagenet_norm=imagenet_norm,
                                use_skip=use_skip,
                                joint_encoder=joint_encoder)

  global ego_prev_rotate1, ego_prev_rotate2, ego_prev_rotate3, ego_baru_x, ego_baru_y, write_x_ego, write_y_ego, img_array1, img_array2, max_width, image_con, total_height, img_array3

  vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
  saver = tf.train.Saver(vars_to_restore)
  sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
  with sv.managed_session() as sess:
    saver.restore(sess, model_ckpt)
    if not gfile.Exists(output_dir):
      gfile.MakeDirs(output_dir)
    logging.info('Predictions will be saved in %s.', output_dir)

    # Collect all images to run inference on.
    im_files, basepath_in = collect_input_images(input_dir, input_list_file,
                                                 file_extension)
    if shuffle:
      logging.info('Shuffling data...')
      np.random.shuffle(im_files)
    logging.info('Running inference on %d files.', len(im_files))

    # Create missing output folders and pre-compute target directories.
    output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

    # Run depth prediction network.
    if depth:
      im_batch = []
      for i in range(len(im_files)):
        if i % 100 == 0:
          logging.info('%s of %s files processed.', i, len(im_files))

        # Read image and run inference.
        if inference_mode == INFERENCE_MODE_SINGLE:
          if inference_crop == INFERENCE_CROP_NONE:
            im = util.load_image(im_files[i], resize=(img_width, img_height))
          elif inference_crop == INFERENCE_CROP_CITYSCAPES:
            im = util.crop_cityscapes(util.load_image(im_files[i]),
                                      resize=(img_width, img_height))
        elif inference_mode == INFERENCE_MODE_TRIPLETS:
          im = util.load_image(im_files[i], resize=(img_width * 3, img_height))
          im = im[:, img_width:img_width*2]
        if flip_for_depth:
          im = np.flip(im, axis=1)
        im_batch.append(im)

        if len(im_batch) == batch_size or i == len(im_files) - 1:
          # Call inference on batch.
          for _ in range(batch_size - len(im_batch)):  # Fill up batch.
            im_batch.append(np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
          im_batch = np.stack(im_batch, axis=0)
          est_depth = inference_model.inference_depth(im_batch, sess)
          if flip_for_depth:
            est_depth = np.flip(est_depth, axis=2)
            im_batch = np.flip(im_batch, axis=2)

          for j in range(len(im_batch)):
            color_map = util.normalize_depth_for_display(
                np.squeeze(est_depth[j]))
            visualization = np.concatenate((im_batch[j], color_map), axis=0)
            # Save raw prediction and color visualization. Extract filename
            # without extension from full path: e.g. path/to/input_dir/folder1/
            # file1.png -> file1
            k = i - len(im_batch) + 1 + j
            filename_root = os.path.splitext(os.path.basename(im_files[k]))[0]
            pref = '_flip' if flip_for_depth else ''
            output_raw = os.path.join(
                output_dirs[k], filename_root + pref + '.npy')
            output_vis = os.path.join(
                output_dirs[k], filename_root + pref + '.png')
            with gfile.Open(output_raw, 'wb') as f:
              np.save(f, est_depth[j])
            util.save_image(output_vis, visualization, file_extension)
          im_batch = []

    if objmotion:
      print("asem")

    # Run egomotion network.
    if egomotion:
      if inference_mode == INFERENCE_MODE_SINGLE:
        # Run regular egomotion inference loop.
        input_image_seq = []
        input_seg_seq = []
        current_sequence_dir = None
        current_output_handle = None
        current_output_handle2 = None
        for i in range(len(im_files)):
          sequence_dir = os.path.dirname(im_files[i])
          if sequence_dir != current_sequence_dir:
            # Assume start of a new sequence, since this image lies in a
            # different directory than the previous ones.
            # Clear egomotion input buffer.
            output_filepath = os.path.join(output_dirs[i], 'totalmotion.txt')
            output_filepath2 = os.path.join(output_dirs[i], 'objmotion.txt')
            if current_output_handle is not None:
              current_output_handle.close()

            if current_output_handle2 is not None:
              current_output_handle2.close()

            current_sequence_dir = sequence_dir
            logging.info('Writing egomotion sequence to %s.', output_filepath)
            logging.info('Writing objmotion sequence to %s.', output_filepath2)
            current_output_handle = gfile.Open(output_filepath, 'w')
            current_output_handle2 = gfile.Open(output_filepath2, 'w')
            input_image_seq = []
          im = util.load_image(im_files[i], resize=(img_width, img_height))
          input_image_seq.append(im)
          if use_masks:
            im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
            if not gfile.Exists(im_seg_path):
              raise ValueError('No segmentation mask %s has been found for '
                               'image %s. If none are available, disable '
                               'use_masks.' % (im_seg_path, im_files[i]))
            input_seg_seq.append(util.load_image(im_seg_path,
                                                 resize=(img_width, img_height),
                                                 interpolation='nn'))

          if len(input_image_seq) < seq_length:  # Buffer not filled yet.
            continue
          if len(input_image_seq) > seq_length:  # Remove oldest entry.
            del input_image_seq[0]
            if use_masks:
              del input_seg_seq[0]

          input_image_stack = np.concatenate(input_image_seq, axis=2)
          input_image_stack = np.expand_dims(input_image_stack, axis=0)
          if use_masks:
            input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)

          # print('')
          # print('wenaknoooooooooooooooooooooooooo')
          # print('')

          est_egomotion = np.squeeze(inference_model.inference_egomotion(
              input_image_stack, sess))

          #####################################################

          print('')
          print('est_egomotion1 = ', est_egomotion)
          print('est_egomotion2 = ', est_egomotion[0])
          print('est_egomotion3 = ', est_egomotion[0][0])
          print('est_egomotion4 = ', est_egomotion[0][5])
          #a

          ego_x_prev = float(est_egomotion[0][0])*scaling
          ego_y_prev = float(est_egomotion[0][1])*scaling
          ego_z_prev = float(est_egomotion[0][2])*scaling

          ego_rot1_prev = float(est_egomotion[0][3])
          ego_rot2_prev = float(est_egomotion[0][4])
          ego_rot3_prev = float(est_egomotion[0][5])

          ego_prev_rotate1 = ego_prev_rotate1 + ego_rot1_prev
          ego_prev_rotate2 = ego_prev_rotate2 + ego_rot2_prev
          ego_prev_rotate3 = ego_prev_rotate3 + ego_rot3_prev

          ego_coorY_prev = (((np.cos(ego_prev_rotate2)*np.cos(ego_prev_rotate3)) - (np.sin(ego_prev_rotate1)*np.sin(ego_prev_rotate2)*np.sin(ego_prev_rotate3)))*ego_x_prev) - (np.cos(ego_prev_rotate1)*np.sin(ego_prev_rotate3)*ego_y_prev)+ (((np.sin(ego_prev_rotate2)*np.cos(ego_prev_rotate3))+(np.sin(ego_prev_rotate1)*np.cos(ego_prev_rotate2)*np.sin(ego_prev_rotate3)))*ego_z_prev)
          ego_coorX_prev = ((np.cos(ego_prev_rotate1)*np.sin(ego_prev_rotate2))*ego_x_prev) + (np.sin(ego_prev_rotate1)*ego_y_prev) + ((np.cos(ego_prev_rotate1)*np.cos(ego_prev_rotate2))*ego_z_prev)

          ego_prev_x, ego_prev_y = ego_coorY_prev*scale, -ego_coorX_prev*scale

          ego_baru_x = ego_baru_x + (-ego_prev_x*1.5)
          ego_baru_y = ego_baru_y + (-ego_prev_y*1.5)
          
          write_x_ego = 640 - ego_baru_x
          write_y_ego = 900 - ego_baru_y

          cv2.circle(traj_new, (int(write_x_ego), int(write_y_ego)) ,1, (0,255,0), 2)
          cv2.circle(traj_new, (int(write_x_ego-14), int(write_y_ego+3)) ,1, (255,255,255), 3)
          cv2.circle(traj_new, (int(write_x_ego+14), int(write_y_ego+3)) ,1, (255,255,255), 3)

          traj_viz = traj_new.copy()

          cv2.rectangle(traj_viz, (int(write_x_ego)-7,int(write_y_ego)-14), (int(write_x_ego)+7,int(write_y_ego)+14), (0,255,0), 2)

          # Read image
          print('im_files[i] = ', im_files[i])
          imgg = cv2.imread(im_files[i])
          img_array1.append(imgg)
          height1, width1, layers1 = imgg.shape
          size1 = (width1,height1)

          img_array2.append(traj_viz)
          height2, width2, layers2 = traj_viz.shape
          size2 = (width2,height2)

          image_con = []
          max_width = 0
          total_height = 0

          image_con.append(traj_viz)
          if image_con[-1].shape[1] > max_width:
              max_width = image_con[-1].shape[1]
          total_height += image_con[-1].shape[0]

          image_con.append(imgg)
          if image_con[-1].shape[1] > max_width:
              max_width = image_con[-1].shape[1]
          total_height += image_con[-1].shape[0]

          final_image = np.zeros((total_height,max_width,3),dtype=np.uint8)

          current_y = 0 # keep track of where your current image was last placed in the y coordinate
          for image in image_con:
            # add an image to the final array and increment the y coordinate
            final_image[current_y:image.shape[0]+current_y,:image.shape[1],:] = image
            current_y += image.shape[0]

          img_array3.append(final_image)
          height3, width3, layers3 = final_image.shape
          size3 = (width3,height3)

          #cv2.imshow('moms', final_image)
          #cv2.imwrite("a.jpg", final_image)

          #cv2.imshow('img', imgg)
          cv2.imshow('ngaplo', traj_viz)
          cv2.waitKey(1)


          #######################################################

          est_objectmotion = np.squeeze(inference_model.inference_objectmotion(
              input_image_stack, sess))


          egomotion_str = []
          objectmotion_str = []

          for j in range(seq_length - 1):
            egomotion_str.append(','.join([str(d) for d in est_egomotion[j]]))
            objectmotion_str.append(','.join([str(d) for d in est_objectmotion[j]]))

          current_output_handle.write(
              str(i) + ' ' + ' '.join(egomotion_str) +',' + ' '.join(objectmotion_str) + '\n')
          
          current_output_handle2.write(
              str(i) + ' ' + ' '.join(objectmotion_str) + '\n')

        if current_output_handle is not None:
          current_output_handle.close()

        if current_output_handle2 is not None:
          current_output_handle2.close()

        ### SAVE IMAGE TO VIDEO
        #out1 = cv2.VideoWriter('scene1.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size1)
        #out2 = cv2.VideoWriter('result1.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size2)
        out3 = cv2.VideoWriter('total1.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size3)

        # for i in range(len(img_array1)):
        #   out1.write(img_array1[i])
        # out1.release()

        # for j in range(len(img_array2)):
        #   out2.write(img_array2[j])
        # out2.release()

        for j in range(len(img_array3)):
          out3.write(img_array3[j])
        out3.release()

      elif inference_mode == INFERENCE_MODE_TRIPLETS:
        print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
        written_before = []
        for i in range(len(im_files)):
          im = util.load_image(im_files[i], resize=(img_width * 3, img_height))
          input_image_stack = np.concatenate(
              [im[:, :img_width], im[:, img_width:img_width*2],
               im[:, img_width*2:]], axis=2)
          input_image_stack = np.expand_dims(input_image_stack, axis=0)
          if use_masks:
            im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
            if not gfile.Exists(im_seg_path):
              raise ValueError('No segmentation mask %s has been found for '
                               'image %s. If none are available, disable '
                               'use_masks.' % (im_seg_path, im_files[i]))
            seg = util.load_image(im_seg_path,
                                  resize=(img_width * 3, img_height),
                                  interpolation='nn')
            input_seg_seq = [seg[:, :img_width], seg[:, img_width:img_width*2],
                             seg[:, img_width*2:]]
            input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)
          est_egomotion = inference_model.inference_egomotion(
              input_image_stack, sess)
          est_egomotion = np.squeeze(est_egomotion)
          egomotion_1_2 = ','.join([str(d) for d in est_egomotion[0]])
          egomotion_2_3 = ','.join([str(d) for d in est_egomotion[1]])

          output_filepath = os.path.join(output_dirs[i], 'egomotion.txt')
          file_mode = 'w' if output_filepath not in written_before else 'a'
          with gfile.Open(output_filepath, file_mode) as current_output_handle:
            current_output_handle.write(str(i) + ' ' + egomotion_1_2 + ' ' +
                                        egomotion_2_3 + '\n')
          written_before.append(output_filepath)
      #logging.info('Done.')

    elif objmotion:
      print('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb')
      input_image_seq = []
      input_seg_seq = []
      current_sequence_dir = None
      current_output_handle = None
      for i in range(len(im_files)):
        sequence_dir = os.path.dirname(im_files[i])
        if sequence_dir != current_sequence_dir:
          output_filepath = os.path.join(output_dirs[i], 'objmotion.txt')
          if current_output_handle is not None:
            current_output_handle.close()
            current_sequence_dir = sequence_dir
          logging.info('Writing egomotion sequence to %s.', output_filepath)
          logging.info('Writing objmotion sequence to %s.', output_filepath2)
          current_output_handle = gfile.Open(output_filepath, 'w')
          input_image_seq = []
        im = util.load_image(im_files[i], resize=(img_width, img_height))
        input_image_seq.append(im)
        if use_masks:
          im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
          if not gfile.Exists(im_seg_path):
            raise ValueError('No segmentation mask %s has been found for '
                              'image %s. If none are available, disable '
                              'use_masks.' % (im_seg_path, im_files[i]))
          input_seg_seq.append(util.load_image(im_seg_path,
                                                 resize=(img_width, img_height),
                                                 interpolation='nn'))

        if len(input_image_seq) < seq_length:  # Buffer not filled yet.
            continue
        if len(input_image_seq) > seq_length:  # Remove oldest entry.
          del input_image_seq[0]
          if use_masks:
            del input_seg_seq[0]

        input_image_stack = np.concatenate(input_image_seq, axis=2)
        input_image_stack = np.expand_dims(input_image_stack, axis=0)
        if use_masks:
          input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)

        est_objectmotion = np.squeeze(inference_model.inference_objectmotion(
              input_image_stack, sess))
Пример #12
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
  """Runs inference. Refer to flags in inference.py for details."""
  inference_model = model.Model(is_training=False,
                                batch_size=batch_size,
                                img_height=img_height,
                                img_width=img_width,
                                seq_length=seq_length,
                                architecture=architecture,
                                imagenet_norm=imagenet_norm,
                                use_skip=use_skip,
                                joint_encoder=joint_encoder)
  vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
  saver = tf.train.Saver(vars_to_restore)
  sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
  with sv.managed_session() as sess:
    saver.restore(sess, model_ckpt)
    if not gfile.Exists(output_dir):
      gfile.MakeDirs(output_dir)
    logging.info('Predictions will be saved in %s.', output_dir)

    # Collect all images to run inference on.
    im_files, basepath_in = collect_input_images(input_dir, input_list_file,
                                                 file_extension)
    if shuffle:
      logging.info('Shuffling data...')
      np.random.shuffle(im_files)
    logging.info('Running inference on %d files.', len(im_files))

    # Create missing output folders and pre-compute target directories.
    output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

    # Run depth prediction network.
    if depth:
      im_batch = []
      for i in range(len(im_files)):
        if i % 100 == 0:
          logging.info('%s of %s files processed.', i, len(im_files))

        # Read image and run inference.
        if inference_mode == INFERENCE_MODE_SINGLE:
          if inference_crop == INFERENCE_CROP_NONE:
            im = util.load_image(im_files[i], resize=(img_width, img_height))
          elif inference_crop == INFERENCE_CROP_CITYSCAPES:
            im = util.crop_cityscapes(util.load_image(im_files[i]),
                                      resize=(img_width, img_height))
        elif inference_mode == INFERENCE_MODE_TRIPLETS:
          im = util.load_image(im_files[i], resize=(img_width * 3, img_height))
          im = im[:, img_width:img_width*2]
        if flip_for_depth:
          im = np.flip(im, axis=1)
        im_batch.append(im)

        if len(im_batch) == batch_size or i == len(im_files) - 1:
          # Call inference on batch.
          for _ in range(batch_size - len(im_batch)):  # Fill up batch.
            im_batch.append(np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
          im_batch = np.stack(im_batch, axis=0)
          est_depth = inference_model.inference_depth(im_batch, sess)
          if flip_for_depth:
            est_depth = np.flip(est_depth, axis=2)
            im_batch = np.flip(im_batch, axis=2)

          for j in range(len(im_batch)):
            color_map = util.normalize_depth_for_display(
                np.squeeze(est_depth[j]))
            visualization = np.concatenate((im_batch[j], color_map), axis=0)
            # Save raw prediction and color visualization. Extract filename
            # without extension from full path: e.g. path/to/input_dir/folder1/
            # file1.png -> file1
            k = i - len(im_batch) + 1 + j
            filename_root = os.path.splitext(os.path.basename(im_files[k]))[0]
            pref = '_flip' if flip_for_depth else ''
            output_raw = os.path.join(
                output_dirs[k], filename_root + pref + '.npy')
            output_vis = os.path.join(
                output_dirs[k], filename_root + pref + '.png')
            with gfile.Open(output_raw, 'wb') as f:
              np.save(f, est_depth[j])
            util.save_image(output_vis, visualization, file_extension)
          im_batch = []

    # Run egomotion network.
    if egomotion:
      if inference_mode == INFERENCE_MODE_SINGLE:
        # Run regular egomotion inference loop.
        input_image_seq = []
        input_seg_seq = []
        current_sequence_dir = None
        current_output_handle = None
        for i in range(len(im_files)):
          sequence_dir = os.path.dirname(im_files[i])
          if sequence_dir != current_sequence_dir:
            # Assume start of a new sequence, since this image lies in a
            # different directory than the previous ones.
            # Clear egomotion input buffer.
            output_filepath = os.path.join(output_dirs[i], 'egomotion.txt')
            if current_output_handle is not None:
              current_output_handle.close()
            current_sequence_dir = sequence_dir
            logging.info('Writing egomotion sequence to %s.', output_filepath)
            current_output_handle = gfile.Open(output_filepath, 'w')
            input_image_seq = []
          im = util.load_image(im_files[i], resize=(img_width, img_height))
          input_image_seq.append(im)
          if use_masks:
            im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
            if not gfile.Exists(im_seg_path):
              raise ValueError('No segmentation mask %s has been found for '
                               'image %s. If none are available, disable '
                               'use_masks.' % (im_seg_path, im_files[i]))
            input_seg_seq.append(util.load_image(im_seg_path,
                                                 resize=(img_width, img_height),
                                                 interpolation='nn'))

          if len(input_image_seq) < seq_length:  # Buffer not filled yet.
            continue
          if len(input_image_seq) > seq_length:  # Remove oldest entry.
            del input_image_seq[0]
            if use_masks:
              del input_seg_seq[0]

          input_image_stack = np.concatenate(input_image_seq, axis=2)
          input_image_stack = np.expand_dims(input_image_stack, axis=0)
          if use_masks:
            input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)
          est_egomotion = np.squeeze(inference_model.inference_egomotion(
              input_image_stack, sess))
          egomotion_str = []
          for j in range(seq_length - 1):
            egomotion_str.append(','.join([str(d) for d in est_egomotion[j]]))
          current_output_handle.write(
              str(i) + ' ' + ' '.join(egomotion_str) + '\n')
        if current_output_handle is not None:
          current_output_handle.close()
      elif inference_mode == INFERENCE_MODE_TRIPLETS:
        written_before = []
        for i in range(len(im_files)):
          im = util.load_image(im_files[i], resize=(img_width * 3, img_height))
          input_image_stack = np.concatenate(
              [im[:, :img_width], im[:, img_width:img_width*2],
               im[:, img_width*2:]], axis=2)
          input_image_stack = np.expand_dims(input_image_stack, axis=0)
          if use_masks:
            im_seg_path = im_files[i].replace('.%s' % file_extension,
                                              '-seg.%s' % file_extension)
            if not gfile.Exists(im_seg_path):
              raise ValueError('No segmentation mask %s has been found for '
                               'image %s. If none are available, disable '
                               'use_masks.' % (im_seg_path, im_files[i]))
            seg = util.load_image(im_seg_path,
                                  resize=(img_width * 3, img_height),
                                  interpolation='nn')
            input_seg_seq = [seg[:, :img_width], seg[:, img_width:img_width*2],
                             seg[:, img_width*2:]]
            input_image_stack = mask_image_stack(input_image_stack,
                                                 input_seg_seq)
          est_egomotion = inference_model.inference_egomotion(
              input_image_stack, sess)
          est_egomotion = np.squeeze(est_egomotion)
          egomotion_1_2 = ','.join([str(d) for d in est_egomotion[0]])
          egomotion_2_3 = ','.join([str(d) for d in est_egomotion[1]])

          output_filepath = os.path.join(output_dirs[i], 'egomotion.txt')
          file_mode = 'w' if output_filepath not in written_before else 'a'
          with gfile.Open(output_filepath, file_mode) as current_output_handle:
            current_output_handle.write(str(i) + ' ' + egomotion_1_2 + ' ' +
                                        egomotion_2_3 + '\n')
          written_before.append(output_filepath)
      logging.info('Done.')