示例#1
0
 def inference(self, image):
   """Inference method.
   
   This method corresponds to the disparity net of the struct2depth model.
   
   Args:
     image (numpy.array): Image array, dim = (h, w, c).
     
   Return:
     disparity (numpy.array): Disparity map, dim = (h, w, c).
   
   """
   h, w, c = image.shape
   # scale from 0-255 to 0-1
   image = self._scale(image)
   # image resize
   image_res = self._resize(image, mode='inference')
   image_batch = np.expand_dims(image_res, axis=0)
   # get operators from graph
   input_image = self.graph.get_tensor_by_name('depth_prediction/raw_input:0')
   output_disp = self.graph.get_tensor_by_name('depth_prediction/add_3:0')
   # run inference
   with self.graph.as_default():
     t0 = datetime.now()
     disp_batch = self.sess.run(output_disp, feed_dict={input_image: image_batch})
     t1 = datetime.now()
     self._log_info('*TF Disparity*: {}'.format(get_tdiff(t0, t1)))
     self.disparity = self._resize(np.squeeze(disp_batch), mode='restore')
     disp_scale = self.image_size[0]/self.input_size[0]
     self.disparity *= disp_scale
   return self.disparity
示例#2
0
 def get_egomotion(self, image_list):
   """Obtaining egomotion vector from a triplet of images.
   
   Args:
     image_list (numpy.array): List of images, dim = (3, h, w, c).
     
   Return:
     egomotion (numpy.array): Egomotion vector [tx, ty, tz, rx, ry, rz],
                              dim = (6,).
   
   """
   image_list_res = [self._resize(i, mode='inference') for i in image_list]
   image_stack = np.concatenate(image_list_res, axis=2)
   image_stack = self._scale(image_stack) # scale from 0-255 to 0-1
   image_stack_batch = np.expand_dims(image_stack, axis=0)
   # get operators from graph
   input_image_stack = self.graph.get_tensor_by_name('raw_input:0')
   output_egomotion = self.graph.get_tensor_by_name('egomotion_prediction/pose_exp_net/pose/concat:0')
   # run inference
   with self.graph.as_default():
     t0 = datetime.now()
     egomotion_batch = self.sess.run(output_egomotion, feed_dict={input_image_stack: image_stack_batch})
     t1 = datetime.now()
     self._log_info('*TF Egomotion*: {}'.format(get_tdiff(t0, t1)))
     self.egomotion = np.squeeze(egomotion_batch)
     self.egomotion[:,:3] *= self.pixel2meter_scale
   return self.egomotion
示例#3
0
 def inference(self, image, score_threshold=None):
     """Inference method.
 
 This method is the main function of this class. It takes in an image and 
 gives out detection results.
 
 Args:
   image (numpy.array): Image array, dim = (h, w, c).
   score_threshold (float, optional): Threshold for an detected object to be
                                      reported. Default: None.
   
 Return:
   boxes (numpy.array): List of bounding boxes, dim = (n, 4).
   scores (numpy.array): List of confidence scores, dim = (n,).
   classes (numpy.array): List of labels, dim = (n,).
 
 """
     h, w, c = image.shape
     image_batch = np.expand_dims(image, axis=0)
     # get operators from graph
     image_tensor = self.graph.get_tensor_by_name('image_tensor:0')
     detection_boxes = self.graph.get_tensor_by_name('detection_boxes:0')
     detection_scores = self.graph.get_tensor_by_name('detection_scores:0')
     detection_classes = self.graph.get_tensor_by_name(
         'detection_classes:0')
     num_detections = self.graph.get_tensor_by_name('num_detections:0')
     # run inference
     with self.graph.as_default():
         t0 = datetime.now()
         (boxes, scores, classes,
          num) = self.sess.run([
              detection_boxes, detection_scores, detection_classes,
              num_detections
          ],
                               feed_dict={image_tensor: image_batch})
         t1 = datetime.now()
         num = int(num)
         self._log_info('*TF Detection*: {}'.format(get_tdiff(t0, t1)))
     # post processing ...
     # purge useless dimension
     boxes, scores, classes = np.squeeze(boxes), np.squeeze(
         scores), np.squeeze(classes)
     # take only valid results
     boxes, scores, classes = boxes[:num, :], scores[:num], classes[:num]
     # score threshold
     if score_threshold is None:
         score_threshold = self.score_threshold
     boxes = boxes[scores > score_threshold, :]
     classes = classes[scores > score_threshold]
     scores = scores[scores > score_threshold]
     num = scores.shape[0]
     self._log_info('{} objects found'.format(num))
     # x-y reorder
     boxes = boxes[:, np.array([1, 0, 3, 2])]
     # transform from 0-1 to 0-w and 0-h
     boxes = np.multiply(boxes, np.array([w, h, w, h])).astype(np.int32)
     return boxes, scores, classes
示例#4
0
    def run_frame(self, image):
        """Frame routine, including main pipeline, triplet buil-up, and trajectory
    pipeline.
    
    Args:
      image (numpy.array): Image array, dim = (h, w, c).
      
    Return:
      frame_idx (int): Frame index.
      disp (numpy.array): Disparity map, for visualization, dim = (h, w, c).
      egomo_trmat (numpy.array): Accumulated egomotion transformation matrix, 
                                 for visualization, dim = (4, 4).
      t_list (list of tracker): List of trackers for visualization.
    
    """
        self.frame_idx += 1
        # run main pipeline
        t0 = datetime.now()
        disp = self.main_pipeline(image)
        t1 = datetime.now()
        logging.info('main pipeline: {}'.format(get_tdiff(t0, t1)))

        # prepare image sequence of 3 for trajectory pipeline
        t0 = datetime.now()
        self.image_seq.append(image)
        if len(self.image_seq) > 3:
            del self.image_seq[0]
        t1 = datetime.now()
        logging.info('image stack: {}'.format(get_tdiff(t0, t1)))

        # run trajectory pipeline
        t0 = datetime.now()
        if len(self.image_seq) >= 3:
            self.egomo_trmat = self.traj_pipeline(prev_trmat=self.egomo_trmat)
        t1 = datetime.now()
        logging.info('traj pipeline: {}'.format(get_tdiff(t0, t1)))
        return self.frame_idx, disp, self.egomo_trmat, self.t_list
示例#5
0
    def main_pipeline(self, image):
        """Main pipeline of tracking-by-detection.
    
    From one image, we can obtain a list of detected objects along with their
    bounding boxes, labels, and depth. Objects are tracked with the data
    association solver and a list of trackers.

    Args:
      image (numpy.array): Image array, dim = (h, w, c).
      
    Return:
      disp (numpy.array): Disparity map, for visualization, dim = (h, w, c).
    
    """
        # detection
        t0 = datetime.now()
        bbox_list, score_list, label_list = self.det.inference(image)
        t1 = datetime.now()
        logging.info('main pipeline (det): {}'.format(get_tdiff(t0, t1)))

        # estimation
        t0 = datetime.now()
        disp = self.est.inference(image)
        depth_list = self.est.calc_depth(bbox_list)
        t1 = datetime.now()
        logging.info('main pipeline (est): {}'.format(get_tdiff(t0, t1)))

        # tracker predict
        t0 = datetime.now()
        for t in self.t_list:
            t.predict()
        t1 = datetime.now()
        logging.info('main pipeline (trk_pred): {}'.format(get_tdiff(t0, t1)))

        # associate
        t0 = datetime.now()
        matched_pair, unmatched_bbox_list, _ = associate(
            bbox_list, label_list, self.t_list)
        t1 = datetime.now()
        logging.info('main pipeline (da_solver): {}'.format(get_tdiff(t0, t1)))

        t0 = datetime.now()
        # update trackers for matched_pair
        for m in matched_pair:
            t = self.t_list[m[1]]
            bbox = bbox_list[m[0]]
            depth = depth_list[m[0]]
            est_dict = {'label': label_list[m[0]], 'score': score_list[m[0]]}
            t.update(self.frame_idx, bbox, depth, est_dict)

        # update in-track status of all trackers
        for t in self.t_list:
            t.update_status(self.frame_idx)

        # purge out dead trackers
        self.t_list = [t for t in self.t_list if t.get_status()]

        # create new trackers for unmatched_bbox_list
        for b_idx in unmatched_bbox_list:
            bbox = bbox_list[b_idx]
            depth = depth_list[b_idx]
            est_dict = {'label': label_list[b_idx], 'score': score_list[b_idx]}
            self.t_list.append(
                tracker(self.t_cfg, self.tid_new, bbox, depth, est_dict))
            self.tid_new += 1

        t1 = datetime.now()
        logging.info('main pipeline (trk_upd): {}'.format(get_tdiff(t0, t1)))

        # disparity map for display
        return disp