示例#1
0
    def __init__(self,
                 dataset_type,
                 dataset_path,
                 real_path,
                 mesh_path,
                 mesh_info,
                 object_id,
                 batch_size,
                 img_res=(224, 224, 3),
                 is_testing=False):
        self.data_type = dataset_type
        self.img_res = img_res
        self.dataset_path = dataset_path
        self.real_path = [
            os.path.join(real_path, x) for x in os.listdir(real_path)
        ]
        self.batch_size = batch_size
        self.is_testing = is_testing
        self.ply_path = mesh_path
        self.obj_id = int(object_id)

        # annotate
        self.train_info = os.path.join(self.dataset_path, 'annotations',
                                       'instances_' + 'train' + '.json')
        self.val_info = os.path.join(self.dataset_path, 'annotations',
                                     'instances_' + 'val' + '.json')
        # self.mesh_info = os.path.join(self.dataset_path, 'annotations', 'models_info' + '.yml')
        self.mesh_info = mesh_info
        with open(self.train_info, 'r') as js:
            data = json.load(js)
        image_ann = data["images"]
        anno_ann = data["annotations"]
        self.image_ids = []
        self.Anns = []

        # init renderer
        # < 11 ms;
        self.ren = bop_renderer.Renderer()
        self.ren.init(640, 480)
        self.ren.add_object(self.obj_id, self.ply_path)

        stream = open(self.mesh_info, 'r')
        for key, value in yaml.load(stream).items():
            # for key, value in yaml.load(open(self.mesh_info)).items():
            if int(key) == self.obj_id + 1:
                self.model_dia = value['diameter']

        for ann in anno_ann:
            y_mean = (ann['bbox'][0] + ann['bbox'][2] * 0.5)
            x_mean = (ann['bbox'][1] + ann['bbox'][3] * 0.5)
            max_side = np.max(ann['bbox'][2:])
            x_min = int(x_mean - max_side * 0.75)
            x_max = int(x_mean + max_side * 0.75)
            y_min = int(y_mean - max_side * 0.75)
            y_max = int(y_mean + max_side * 0.75)
            if ann['category_id'] != 2 or ann[
                    'feature_visibility'] < 0.5 or x_min < 0 or x_max > 639 or y_min < 0 or y_max > 479:
                continue
            else:
                self.Anns.append(ann)
                # for img_info in image_ann:
                # print(img_info)
                #    if img_info['id'] == ann['id']:
                #        self.image_ids.append(img_info['file_name'])
                #        print(img_info['file_name'])
                template_name = '00000000000'
                id = str(ann['image_id'])
                # print(ann['id'])
                name = template_name[:-len(id)] + id + '_rgb.png'
                img_path = os.path.join(self.dataset_path, 'images',
                                        self.data_type, name)
                # print(name)
                self.image_ids.append(img_path)

        self.fx = image_ann[0]["fx"]
        self.fy = image_ann[0]["fy"]
        self.cx = image_ann[0]["cx"]
        self.cy = image_ann[0]["cy"]

        #self.image_idxs = range(len(self.image_ids))
        c = list(zip(self.Anns, self.image_ids))  #, self.image_idxs))
        np.random.shuffle(c)
        self.Anns, self.image_ids = zip(*c)

        self.img_seq = iaa.Sequential(
            [
                # blur
                iaa.SomeOf((0, 2), [
                    iaa.GaussianBlur((0.0, 2.0)),
                    iaa.AverageBlur(k=(3, 7)),
                    iaa.MedianBlur(k=(3, 7)),
                    iaa.BilateralBlur(d=(1, 7)),
                    iaa.MotionBlur(k=(3, 7))
                ]),
                # color
                iaa.SomeOf(
                    (0, 2),
                    [
                        # iaa.WithColorspace(),
                        iaa.AddToHueAndSaturation((-15, 15)),
                        # iaa.ChangeColorspace(to_colorspace[], alpha=0.5),
                        iaa.Grayscale(alpha=(0.0, 0.2))
                    ]),
                # brightness
                iaa.OneOf([
                    iaa.Sequential([
                        iaa.Add((-10, 10), per_channel=0.5),
                        iaa.Multiply((0.75, 1.25), per_channel=0.5)
                    ]),
                    iaa.Add((-10, 10), per_channel=0.5),
                    iaa.Multiply((0.75, 1.25), per_channel=0.5),
                    iaa.FrequencyNoiseAlpha(exponent=(-4, 0),
                                            first=iaa.Multiply(
                                                (0.75, 1.25), per_channel=0.5),
                                            second=iaa.LinearContrast(
                                                (0.7, 1.3), per_channel=0.5))
                ]),
                # contrast
                iaa.SomeOf((0, 2), [
                    iaa.GammaContrast((0.75, 1.25), per_channel=0.5),
                    iaa.SigmoidContrast(
                        gain=(0, 10), cutoff=(0.25, 0.75), per_channel=0.5),
                    iaa.LogContrast(gain=(0.75, 1), per_channel=0.5),
                    iaa.LinearContrast(alpha=(0.7, 1.3), per_channel=0.5)
                ]),
            ],
            random_order=True)

        self.n_batches = int(np.floor(len(self.image_ids) / self.batch_size))
        self.on_epoch_end()
        self.dataset_length = len(self.image_ids)
示例#2
0
if __name__ == "__main__":

    data_path = '/home/stefan/data/renderings/CIT_render_250/patches'
    mesh_path = '/home/stefan/data/Meshes/CIT_color/'
    target = '/home/stefan/data/train_data/CIT_PBR/'

    visu = True
    resX = 640
    resY = 480
    fx = 623.1298104626079  # blender calc
    fy = 617.1590544390115  # blender calc
    cx = 320.0
    cy = 240.0
    K = [fx, 0.0, cx, 0.0, fy, cy, 0.0, 0.0, 1.0]

    ren = bop_renderer.Renderer()
    ren.init(resX, resY)
    mesh_id = 1
    light_pose = [0.0, 0.0, 0.0]
    light_color = [1.0, 0.0, 0.0]
    light_ambient_weight = 1.0
    light_diffuse_weight = 1.0
    light_spec_weight = 0.0
    light_spec_shine = 1.0
    ren.set_light(light_pose, light_color, light_ambient_weight,
                  light_diffuse_weight, light_spec_weight, light_spec_shine)
    categories = []

    for mesh_now in os.listdir(mesh_path):
        mesh_path_now = os.path.join(mesh_path, mesh_now)
        if mesh_now[-4:] != '.ply':
示例#3
0
 def __init__(self, width, height):
     """See base class."""
     super(RendererCpp, self).__init__(width, height)
     self.renderer = bop_renderer.Renderer()
     self.renderer.init(width, height)
     self._set_light()
示例#4
0
文件: infer.py 项目: zebrajack/epos
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)

  # Model folder.
  model_dir = os.path.join(config.TF_MODELS_PATH, FLAGS.model)

  # Update flags with parameters loaded from the model folder.
  common.update_flags(os.path.join(model_dir, common.PARAMS_FILENAME))

  # Print the flag values.
  common.print_flags()

  # Folder from which the latest model checkpoint will be loaded.
  checkpoint_dir = os.path.join(model_dir, 'train')

  # Folder for the inference output.
  infer_dir = os.path.join(model_dir, 'infer')
  tf.gfile.MakeDirs(infer_dir)

  # Folder for the visualization output.
  vis_dir = os.path.join(model_dir, 'vis')
  tf.gfile.MakeDirs(vis_dir)

  # TFRecord files used for training.
  tfrecord_names = FLAGS.infer_tfrecord_names
  if not isinstance(FLAGS.infer_tfrecord_names, list):
    tfrecord_names = [FLAGS.infer_tfrecord_names]

  # Stride of the final output.
  if FLAGS.upsample_logits:
    # The stride is 1 if the logits are upsampled to the input resolution.
    output_stride = 1
  else:
    assert (len(FLAGS.decoder_output_stride) == 1)
    output_stride = FLAGS.decoder_output_stride[0]

  with tf.Graph().as_default():

    return_gt_orig = np.any([
      FLAGS.task_type == common.LOCALIZATION,
      FLAGS.vis_gt_poses])

    return_gt_maps = np.any([
      FLAGS.vis_pred_obj_labels,
      FLAGS.vis_pred_obj_confs,
      FLAGS.vis_pred_frag_fields])

    # Dataset provider.
    dataset = datagen.Dataset(
      dataset_name=FLAGS.dataset,
      tfrecord_names=tfrecord_names,
      model_dir=model_dir,
      model_variant=FLAGS.model_variant,
      batch_size=1,
      max_height_before_crop=FLAGS.infer_max_height_before_crop,
      crop_size=list(map(int, FLAGS.infer_crop_size)),
      num_frags=FLAGS.num_frags,
      min_visib_fract=None,
      gt_knn_frags=1,
      output_stride=output_stride,
      is_training=False,
      return_gt_orig=return_gt_orig,
      return_gt_maps=return_gt_maps,
      should_shuffle=False,
      should_repeat=False,
      prepare_for_projection=FLAGS.project_to_surface,
      data_augmentations=None)

    # Initialize a renderer for visualization.
    renderer = None
    if FLAGS.vis_gt_poses or FLAGS.vis_pred_poses:
      tf.logging.info('Initializing renderer for visualization...')

      renderer = bop_renderer.Renderer()
      renderer.init(dataset.crop_size[0], dataset.crop_size[1])

      model_type_vis = 'eval'
      dp_model = dataset_params.get_model_params(
        config.BOP_PATH, dataset.dataset_name, model_type=model_type_vis)
      for obj_id in dp_model['obj_ids']:
        path = dp_model['model_tpath'].format(obj_id=obj_id)
        renderer.add_object(obj_id, path)

      tf.logging.info('Renderer initialized.')

    # Inputs.
    samples = dataset.get_one_shot_iterator().get_next()

    # A map from output type to the number of associated channels.
    outputs_to_num_channels = common.get_outputs_to_num_channels(
      dataset.num_objs, dataset.model_store.num_frags)

    # Options of the neural network model.
    model_options = common.ModelOptions(
        outputs_to_num_channels=outputs_to_num_channels,
        crop_size=list(map(int, FLAGS.infer_crop_size)),
        atrous_rates=FLAGS.atrous_rates,
        encoder_output_stride=FLAGS.encoder_output_stride)

    # Construct the inference graph.
    predictions = model.predict(
        images=samples[common.IMAGE],
        model_options=model_options,
        upsample_logits=FLAGS.upsample_logits,
        image_pyramid=FLAGS.image_pyramid,
        num_objs=dataset.num_objs,
        num_frags=dataset.num_frags,
        frag_cls_agnostic=FLAGS.frag_cls_agnostic,
        frag_loc_agnostic=FLAGS.frag_loc_agnostic)

    # Global step.
    tf.train.get_or_create_global_step()

    # Get path to the model checkpoint.
    if FLAGS.checkpoint_name is None:
      checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
    else:
      checkpoint_path = os.path.join(checkpoint_dir, FLAGS.checkpoint_name)

    time_str = time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())
    tf.logging.info('Starting inference at: {}'.format(time_str))
    tf.logging.info('Inference with model: {}'.format(checkpoint_path))

    # Scaffold for initialization.
    scaffold = tf.train.Scaffold(
      init_op=tf.global_variables_initializer(),
      saver=tf.train.Saver(var_list=misc.get_variable_dict()))

    # TensorFlow configuration.
    if FLAGS.cpu_only:
      tf_config = tf.ConfigProto(device_count={'GPU': 0})
    else:
      tf_config = tf.ConfigProto()
      # tf_config.gpu_options.allow_growth = True  # Only necessary GPU memory.
      tf_config.gpu_options.allow_growth = False

    # Nodes that can use multiple threads to parallelize their execution will
    # schedule the individual pieces into this pool.
    tf_config.intra_op_parallelism_threads = 10

    # All ready nodes are scheduled in this pool.
    tf_config.inter_op_parallelism_threads = 10

    poses_all = []
    first_im_poses_num = 0

    session_creator = tf.train.ChiefSessionCreator(
        config=tf_config,
        scaffold=scaffold,
        master=FLAGS.master,
        checkpoint_filename_with_path=checkpoint_path)
    with tf.train.MonitoredSession(
          session_creator=session_creator, hooks=None) as sess:

      im_ind = 0
      while not sess.should_stop():

        # Estimate object poses for the current image.
        poses, run_times = process_image(
            sess=sess,
            samples=samples,
            predictions=predictions,
            im_ind=im_ind,
            crop_size=dataset.crop_size,
            output_scale=(1.0 / output_stride),
            model_store=dataset.model_store,
            renderer=renderer,
            task_type=FLAGS.task_type,
            infer_name=FLAGS.infer_name,
            infer_dir=infer_dir,
            vis_dir=vis_dir)

        # Note that the first image takes longer time (because of TF init).
        tf.logging.info(
          'Image: {}, prediction: {:.3f}, establish_corr: {:.3f}, '
          'fitting: {:.3f}, total time: {:.3f}'.format(
            im_ind, run_times['prediction'], run_times['establish_corr'],
            run_times['fitting'], run_times['total']))

        poses_all += poses
        if im_ind == 0:
          first_im_poses_num = len(poses)
        im_ind += 1

    # Set the time of pose estimates from the first image to the average time.
    # Tensorflow takes a long time on the first image (because of init).
    time_avg = 0.0
    for pose in poses_all:
      time_avg += pose['time']
    if len(poses_all) > 0:
      time_avg /= float((len(poses_all)))
    for i in range(first_im_poses_num):
      poses_all[i]['time'] = time_avg

    # Save the estimated poses in the BOP format:
    # https://bop.felk.cvut.cz/challenges/bop-challenge-2020/#formatofresults
    if FLAGS.save_estimates:
      suffix = ''
      if FLAGS.infer_name is not None:
        suffix = '_{}'.format(FLAGS.infer_name)
      poses_path = os.path.join(
        infer_dir, 'estimated-poses{}.csv'.format(suffix))
      tf.logging.info('Saving estimated poses to: {}'.format(poses_path))
      inout.save_bop_results(poses_path, poses_all, version='bop19')

    time_str = time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())
    tf.logging.info('Finished inference at: {}'.format(time_str))