def inference(self, image): """Inference method. This method corresponds to the disparity net of the struct2depth model. Args: image (numpy.array): Image array, dim = (h, w, c). Return: disparity (numpy.array): Disparity map, dim = (h, w, c). """ h, w, c = image.shape # scale from 0-255 to 0-1 image = self._scale(image) # image resize image_res = self._resize(image, mode='inference') image_batch = np.expand_dims(image_res, axis=0) # get operators from graph input_image = self.graph.get_tensor_by_name('depth_prediction/raw_input:0') output_disp = self.graph.get_tensor_by_name('depth_prediction/add_3:0') # run inference with self.graph.as_default(): t0 = datetime.now() disp_batch = self.sess.run(output_disp, feed_dict={input_image: image_batch}) t1 = datetime.now() self._log_info('*TF Disparity*: {}'.format(get_tdiff(t0, t1))) self.disparity = self._resize(np.squeeze(disp_batch), mode='restore') disp_scale = self.image_size[0]/self.input_size[0] self.disparity *= disp_scale return self.disparity
def get_egomotion(self, image_list): """Obtaining egomotion vector from a triplet of images. Args: image_list (numpy.array): List of images, dim = (3, h, w, c). Return: egomotion (numpy.array): Egomotion vector [tx, ty, tz, rx, ry, rz], dim = (6,). """ image_list_res = [self._resize(i, mode='inference') for i in image_list] image_stack = np.concatenate(image_list_res, axis=2) image_stack = self._scale(image_stack) # scale from 0-255 to 0-1 image_stack_batch = np.expand_dims(image_stack, axis=0) # get operators from graph input_image_stack = self.graph.get_tensor_by_name('raw_input:0') output_egomotion = self.graph.get_tensor_by_name('egomotion_prediction/pose_exp_net/pose/concat:0') # run inference with self.graph.as_default(): t0 = datetime.now() egomotion_batch = self.sess.run(output_egomotion, feed_dict={input_image_stack: image_stack_batch}) t1 = datetime.now() self._log_info('*TF Egomotion*: {}'.format(get_tdiff(t0, t1))) self.egomotion = np.squeeze(egomotion_batch) self.egomotion[:,:3] *= self.pixel2meter_scale return self.egomotion
def inference(self, image, score_threshold=None): """Inference method. This method is the main function of this class. It takes in an image and gives out detection results. Args: image (numpy.array): Image array, dim = (h, w, c). score_threshold (float, optional): Threshold for an detected object to be reported. Default: None. Return: boxes (numpy.array): List of bounding boxes, dim = (n, 4). scores (numpy.array): List of confidence scores, dim = (n,). classes (numpy.array): List of labels, dim = (n,). """ h, w, c = image.shape image_batch = np.expand_dims(image, axis=0) # get operators from graph image_tensor = self.graph.get_tensor_by_name('image_tensor:0') detection_boxes = self.graph.get_tensor_by_name('detection_boxes:0') detection_scores = self.graph.get_tensor_by_name('detection_scores:0') detection_classes = self.graph.get_tensor_by_name( 'detection_classes:0') num_detections = self.graph.get_tensor_by_name('num_detections:0') # run inference with self.graph.as_default(): t0 = datetime.now() (boxes, scores, classes, num) = self.sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_batch}) t1 = datetime.now() num = int(num) self._log_info('*TF Detection*: {}'.format(get_tdiff(t0, t1))) # post processing ... # purge useless dimension boxes, scores, classes = np.squeeze(boxes), np.squeeze( scores), np.squeeze(classes) # take only valid results boxes, scores, classes = boxes[:num, :], scores[:num], classes[:num] # score threshold if score_threshold is None: score_threshold = self.score_threshold boxes = boxes[scores > score_threshold, :] classes = classes[scores > score_threshold] scores = scores[scores > score_threshold] num = scores.shape[0] self._log_info('{} objects found'.format(num)) # x-y reorder boxes = boxes[:, np.array([1, 0, 3, 2])] # transform from 0-1 to 0-w and 0-h boxes = np.multiply(boxes, np.array([w, h, w, h])).astype(np.int32) return boxes, scores, classes
def run_frame(self, image): """Frame routine, including main pipeline, triplet buil-up, and trajectory pipeline. Args: image (numpy.array): Image array, dim = (h, w, c). Return: frame_idx (int): Frame index. disp (numpy.array): Disparity map, for visualization, dim = (h, w, c). egomo_trmat (numpy.array): Accumulated egomotion transformation matrix, for visualization, dim = (4, 4). t_list (list of tracker): List of trackers for visualization. """ self.frame_idx += 1 # run main pipeline t0 = datetime.now() disp = self.main_pipeline(image) t1 = datetime.now() logging.info('main pipeline: {}'.format(get_tdiff(t0, t1))) # prepare image sequence of 3 for trajectory pipeline t0 = datetime.now() self.image_seq.append(image) if len(self.image_seq) > 3: del self.image_seq[0] t1 = datetime.now() logging.info('image stack: {}'.format(get_tdiff(t0, t1))) # run trajectory pipeline t0 = datetime.now() if len(self.image_seq) >= 3: self.egomo_trmat = self.traj_pipeline(prev_trmat=self.egomo_trmat) t1 = datetime.now() logging.info('traj pipeline: {}'.format(get_tdiff(t0, t1))) return self.frame_idx, disp, self.egomo_trmat, self.t_list
def main_pipeline(self, image): """Main pipeline of tracking-by-detection. From one image, we can obtain a list of detected objects along with their bounding boxes, labels, and depth. Objects are tracked with the data association solver and a list of trackers. Args: image (numpy.array): Image array, dim = (h, w, c). Return: disp (numpy.array): Disparity map, for visualization, dim = (h, w, c). """ # detection t0 = datetime.now() bbox_list, score_list, label_list = self.det.inference(image) t1 = datetime.now() logging.info('main pipeline (det): {}'.format(get_tdiff(t0, t1))) # estimation t0 = datetime.now() disp = self.est.inference(image) depth_list = self.est.calc_depth(bbox_list) t1 = datetime.now() logging.info('main pipeline (est): {}'.format(get_tdiff(t0, t1))) # tracker predict t0 = datetime.now() for t in self.t_list: t.predict() t1 = datetime.now() logging.info('main pipeline (trk_pred): {}'.format(get_tdiff(t0, t1))) # associate t0 = datetime.now() matched_pair, unmatched_bbox_list, _ = associate( bbox_list, label_list, self.t_list) t1 = datetime.now() logging.info('main pipeline (da_solver): {}'.format(get_tdiff(t0, t1))) t0 = datetime.now() # update trackers for matched_pair for m in matched_pair: t = self.t_list[m[1]] bbox = bbox_list[m[0]] depth = depth_list[m[0]] est_dict = {'label': label_list[m[0]], 'score': score_list[m[0]]} t.update(self.frame_idx, bbox, depth, est_dict) # update in-track status of all trackers for t in self.t_list: t.update_status(self.frame_idx) # purge out dead trackers self.t_list = [t for t in self.t_list if t.get_status()] # create new trackers for unmatched_bbox_list for b_idx in unmatched_bbox_list: bbox = bbox_list[b_idx] depth = depth_list[b_idx] est_dict = {'label': label_list[b_idx], 'score': score_list[b_idx]} self.t_list.append( tracker(self.t_cfg, self.tid_new, bbox, depth, est_dict)) self.tid_new += 1 t1 = datetime.now() logging.info('main pipeline (trk_upd): {}'.format(get_tdiff(t0, t1))) # disparity map for display return disp