def find_gt_bboxes(id, data_folder, segment_name, start_frame, end_frame): """ This function is for finding the gt bboxes Args: id (str): id of the tracklet gt_folder (str): root for data storage segment_name (str): the segment to look at start_frame (int): which frame to search Return list of bboxes """ gt_info = np.load(os.path.join(data_folder, 'gt_info', '{:}.npz'.format(segment_name)), allow_pickle=True) ego_info = np.load(os.path.join(data_folder, 'ego_info', '{:}.npz'.format(segment_name)), allow_pickle=True) bboxes, ids = gt_info['bboxes'][start_frame], gt_info['ids'][start_frame] index = ids.index(id) gts = list() for i in range(start_frame + 1, end_frame + 1): # first frame needs no evaluation # so begin from start_frame + 1 frame_bboxes = gt_info['bboxes'][i] frame_ids = gt_info['ids'][i] index = frame_ids.index(id) bbox = frame_bboxes[index] bbox = BBox.array2bbox(bbox) ego_matrix = ego_info[str(i)] bbox = BBox.bbox2world(ego_matrix, bbox) gts.append(bbox) return gts
def find_bboxes(id, data_folder, segment_name, start_frame, end_frame): """ In the SOT, the beginning frame is a GT BBox This function is for finding the gt bbox Args: id (str): id of the tracklet data_folder (str): root for data storage segment_name (str): the segment to look at start_frame (int): which frame to search Return BBox (numpy array): [x, y, z, h, l, w, h] """ gt_info = np.load(os.path.join(data_folder, 'gt_info', '{:}.npz'.format(segment_name)), allow_pickle=True) ego_info = np.load(os.path.join(data_folder, 'ego_info', '{:}.npz'.format(segment_name)), allow_pickle=True) bboxes, ids = gt_info['bboxes'][start_frame], gt_info['ids'][start_frame] index = ids.index(id) start_bbox = bboxes[index] gts = list() for i in range(start_frame, end_frame + 1): frame_bboxes = gt_info['bboxes'][i] frame_ids = gt_info['ids'][i] index = frame_ids.index(id) bbox = frame_bboxes[index] bbox = BBox.array2bbox(bbox) ego_matrix = ego_info[str(i)] bbox = BBox.bbox2world(ego_matrix, bbox) gts.append(bbox) return start_bbox, gts
def jac(self, params): if self.tgt_det is None: return np.zeros_like(params) det_bbox_array = BBox.bbox2array(self.tgt_det)[:4] prev_bbox_array = BBox.bbox2array(self.prev_bbox)[:4] dist = prev_bbox_array + params - det_bbox_array derivative = 2 * dist return derivative
def loss(self, params): if self.tgt_det is None: return 0 det_bbox_array = BBox.bbox2array(self.tgt_det)[:4] prev_bbox_array = BBox.bbox2array(self.prev_bbox)[:4] dist = prev_bbox_array + params - det_bbox_array loss = np.sum(dist * dist) return loss
def iou2d(box_a, box_b): boxa_corners = np.array(BBox.box2corners2d(box_a))[:, :2] boxb_corners = np.array(BBox.box2corners2d(box_b))[:, :2] reca, recb = Polygon(boxa_corners), Polygon(boxb_corners) overlap = reca.intersection(recb).area area_a = reca.area area_b = recb.area iou = overlap / (area_a + area_b - overlap + 1e-10) return iou
def frame_result_visualization(frame_result, pc): visualizer = Visualizer2D(figsize=(12, 12)) bbox0, bbox1 = frame_result['bbox0'], frame_result['bbox1'] gt_bbox0, gt_bbox1 = frame_result['gt_bbox0'], frame_result['gt_bbox1'] bbox1, gt_bbox1 = BBox.array2bbox(bbox1), BBox.array2bbox(gt_bbox1) visualizer.handler_box(bbox1, color='light_blue') visualizer.handler_box(gt_bbox1, color='red') vis_pc = utils.pc_in_box_2D(gt_bbox1, pc, 4.0) visualizer.handler_pc(vis_pc) visualizer.show() visualizer.close()
def main(bench_list, result_folder, name, data_folder, token, process): pred_shape_folder = os.path.join(result_folder, name, 'shapes') results = list() for tracklet_index, tracklet_info in enumerate(bench_list): if tracklet_index % process != token: continue print('{:} {:} / {:}'.format(HARDNESS, tracklet_index + 1, len(bench_list))) id = tracklet_info['id'] segment_name = tracklet_info['segment_name'] frame_range = tracklet_info['frame_range'] tracklet_results = json.load( open( os.path.join(result_folder, name, 'summary', '{:}.json'.format(id)))) frame_keys = list(tracklet_results.keys()) frame_keys = sorted(str_list_to_int(frame_keys)) preds = list() for key in frame_keys: bbox = tracklet_results[str(key)]['bbox1'] bbox = BBox.array2bbox(bbox) preds.append(bbox) preds.insert( 0, BBox.array2bbox(tracklet_results[str(frame_keys[0])]['bbox0'])) shape_file_path = os.path.join(pred_shape_folder, '{:}.npz'.format(id)) if os.path.exists(shape_file_path): shape = np.load(shape_file_path, allow_pickle=True)['shape'] else: scene_pcs = np.load(os.path.join(data_folder, 'pc', 'raw_pc', '{:}.npz'.format(segment_name)), allow_pickle=True) ego_infos = np.load(os.path.join(data_folder, 'ego_info', '{:}.npz'.format(segment_name)), allow_pickle=True) pcs = [ pc2world(ego_infos[str(i)], scene_pcs[str(i)]) for i in range(frame_range[0], frame_range[1] + 1) ] shape = create_shapes(pcs, preds) np.savez_compressed(shape_file_path, shape=shape) gt_shape = np.load(os.path.join(data_folder, 'gt_shapes', '{:}.npz'.format(id)), allow_pickle=True)['pc'] cd = compute_cd_loss(shape, gt_shape) results.append(cd) return results
def pre_optim_step(self, optim_data: sot_3d.OptimData, frame_indexes): prev_bbox = optim_data.bboxes[frame_indexes[0]] pred_bbox = optim_data.bboxes[frame_indexes[1]] self.shape_state = BBox.bbox2array(prev_bbox)[:4] self.state = BBox.bbox2array(pred_bbox)[:4] self.shape_pc = optim_data.shape_pcs[frame_indexes[0]] self.pc = optim_data.pcs[frame_indexes[1]] self.pc = utils.pc_in_box(pred_bbox, self.pc, self.box_scaling_next) self.shape_pc -= self.shape_state[:3] self.pc -= self.shape_state[:3] reference_motion = self.state - self.shape_state self.shape_pc, self.pc = self.find_nearest_neighbor( self.shape_pc, self.pc, reference_motion)
def pre_optim_step(self, optim_data: sot_3d.OptimData, frame_indexes): """ prepare the loss computation for icp loss Args: optim_data (sot_3d.OptimData): data for optimization frame_indexes ((frame0, frame1)): which two frames to compute """ self.pc_a = optim_data.pcs[frame_indexes[0]] self.pc_b = optim_data.pcs[frame_indexes[1]] box_a = optim_data.bboxes[frame_indexes[0]] box_b = optim_data.bboxes[frame_indexes[1]] # prepare relevant LiDAR points self.pc_a = utils.pc_in_box(box_a, self.pc_a, self.box_scaling_prev) self.pc_b = utils.pc_in_box(box_b, self.pc_b, self.box_scaling_next) if self.agg_subshape: subshape_pc = optim_data.subshape_pcs[frame_indexes[0]] if subshape_pc is not None: self.pc_a = np.vstack((self.pc_a, subshape_pc)) # subtracting the center of objects # so that we fit the computation of rigid motion point_center = BBox.bbox2array(box_a)[:3] self.pc_a -= point_center[:3] self.pc_b -= point_center[:3] # find correspondences reference_motion = utils.agg_motion( optim_data.motions, (frame_indexes[0] + 1, frame_indexes[1])) self.pc_a, self.pc_b = self.find_nearest_neighbor( self.pc_a, self.pc_b, reference_motion) # set frame interval self.frame_range = (frame_indexes[0], frame_indexes[1])
def sequence_eval(obj_list, result_folder, name, data_folder, iou): """ evaluate and return the tracklet-level information in tracklets """ results = list() for tracklet_info in obj_list: tracklet_results = json.load( open( os.path.join(result_folder, name, 'summary', '{:}.json'.format(tracklet_info['id'])))) frame_keys = list(tracklet_results.keys()) frame_keys = sorted(str_list_to_int(frame_keys)) if iou: # iou has been computed ious = list() for key in frame_keys: ious.append(tracklet_results[str(key)]['iou3d']) tracklet_metrics = metrics_from_ious(ious) else: gts = find_gt_bboxes(id=tracklet_info['id'], data_folder=data_folder, segment_name=tracklet_info['segment_name'], start_frame=tracklet_info['frame_range'][0] + 1, end_frame=tracklet_info['frame_range'][1]) preds = list() for key in frame_keys: bbox = tracklet_results[str(key)]['bbox1'] bbox = BBox.array2bbox(bbox) preds.append(bbox) tracklet_metrics = metrics_from_bboxes(preds, gts) results.append(tracklet_metrics) return results
def pre_optim_step(self): """ Preparation operations before each step of optimization. Extract related data from: motion model, data buffer, shape model Then build up the optim_data, feed it into loss_func to build up the loss function """ # bbox and motion optim_data_motion_model = self.motion_model.pre_optim_step() # get the pc and ego information optim_data_data_buffer = self.data_buffer.pre_optim_step() # get the pred states and shape information pred_states = np.zeros((self.motion_model.size, 4)) for i in range(self.motion_model.size): pred_bbox = optim_data_motion_model[i]['bbox'] pred_states[i, :] = BBox.bbox2array(pred_bbox)[:4] optim_data_shape_map = self.shape_map.pre_optim_step(pred_states) # merge them and create a frame data optim_data_list = [{**optim_data_data_buffer[i], **optim_data_motion_model[i], **optim_data_shape_map[i]} for i in range(len(optim_data_motion_model))] optim_data = OptimData.dict2optim_data(optim_data_list) # send the optim data into the loss factors self.loss_func.pre_optim_step(optim_data) return
def preprocess(self): """ Some data need eplicit transformation to the world coordinate: point cloud, detection bboxes """ self.pc = utils.pc2world(self.ego, self.pc) if self.dets is not None: self.dets = [BBox.bbox2world(self.ego, det) for det in self.dets] return
def iou3d(box_a, box_b): boxa_corners = np.array(BBox.box2corners2d(box_a)) boxb_corners = np.array(BBox.box2corners2d(box_b))[:, :2] reca, recb = Polygon(boxa_corners), Polygon(boxb_corners) overlap_area = reca.intersection(recb).area iou_2d = overlap_area / (reca.area + recb.area - overlap_area) ha, hb = box_a.h, box_b.h za, zb = box_a.z, box_b.z overlap_height = max( 0, min((za + ha / 2) - (zb - hb / 2), (zb + hb / 2) - (za - ha / 2))) overlap_volume = overlap_area * overlap_height union_volume = box_a.w * box_a.l * ha + box_b.w * box_b.l * hb - overlap_volume iou_3d = overlap_volume / union_volume union_height = max(za + ha / 2, zb + hb / 2) - min(za - ha / 2, zb - hb / 2) return iou_2d, iou_3d
def handler_box(self, box: BBox, message: str = '', color='red'): corners = np.array(BBox.box2corners2d(box))[:, :2] corners = np.concatenate([corners, corners[0:1, :2]]) plt.plot(corners[:, 0], corners[:, 1], color=self.COLOR_MAP[color]) plt.text(corners[0, 0] - 1, corners[0, 1] - 1, message, color=self.COLOR_MAP['black'])
def viewable_corner(bbox: BBox, ego): # nearest corner, viewable corner ego_location = ego[:2] corners = np.array(BBox.box2corners2d(bbox))[:, :2] dist = corners - ego_location dist = np.sum(dist * dist, axis=1) corner_index = np.argmin(dist) corner_coordiante = corners[corner_index] return corner_index, corner_coordiante
def post_optim_step(self, optim_motions): """ When each time an optimized motion arrive, we have to update the corresponding motion and predicted bbox location """ if len(optim_motions.shape) == 1: optim_motions = optim_motions.reshape((-1, 4)) for i in range(1, self.size): self.motion_buffer.set_val(i, optim_motions[i - 1]) prev_bbox = self.state_buffer.access(i - 1) next_bbox = BBox.motion2bbox(prev_bbox, optim_motions[i - 1]) self.state_buffer.set_val(i, next_bbox) return
def add_pc(self, pc, bbox: BBox): template_pc = utils.pc_in_box(bbox, pc, self.box_scaling) bbox_state = BBox.bbox2array(bbox)[:4] if self.shape_pc is None: self.shape_pc = template_pc self.shape_state = bbox_state else: new_pc = ObjShape.trans2tgt_location(template_pc, bbox_state, self.shape_state) self.shape_pc = np.vstack((self.shape_pc, new_pc)) if self.downsample: self.shape_pc = utils.downsample(self.shape_pc, voxel_size=self.resolution)
def add_pc(self, pc, bbox: BBox): """ find the current frame pc, append it to buffer, then update the subshape """ cur_frame_pc = utils.pc_in_box(bbox, pc, self.box_scaling) bbox_state = BBox.bbox2array(bbox)[:4] if self.downsample: cur_frame_pc = utils.downsample(cur_frame_pc, self.resolution) self.pc_buffer.push(cur_frame_pc) self.state_buffer.push(bbox_state) self.size = self.pc_buffer.size self.update_subshape() return
def summary_result(self): """ return the result of the whole tracklet { 1: estimated result on frame 1, 2: estimated result on frame 2, ... n: estimated result on frame n, } """ tracklet_result = dict() for i in range(1, self.cur_frame): frame_result = self.result_log[i] # take the latest result for online setting by default latest_key = max(list(frame_result.keys())) latest_result = frame_result[latest_key] bbox0 = BBox.bbox2array(latest_result['bbox0']).tolist() bbox1 = BBox.bbox2array(latest_result['bbox1']).tolist() tracklet_result[i] = { 'motion': latest_result['motion'], 'bbox0': bbox0, 'bbox1': bbox1} return tracklet_result
def create_gt_shape(pcs, bboxes, resolution=0.05): """ create the gt shape using input point clouds, bboxes, and ego transformations """ shape_pc = np.zeros((0, 3)) for i, (pc, bbox) in enumerate(zip(pcs, bboxes)): pc = sot_3d.utils.pc_in_box(bbox, pc, 1.0) bbox_state = BBox.bbox2array(bbox)[:4] pc -= bbox_state[:3] pc = sot_3d.utils.apply_motion_to_points(pc, np.array([0, 0, 0, -bbox.o])) shape_pc = np.vstack((shape_pc, pc)) shape_pc = sot_3d.utils.downsample(shape_pc, voxel_size=resolution) return shape_pc
def find_bboxes(id, data_folder, segment_name, start_frame, end_frame): """ This function is for finding the gt bboxes """ gt_info = np.load(os.path.join(data_folder, 'gt_info', '{:}.npz'.format(segment_name)), allow_pickle=True) bboxes, ids = gt_info['bboxes'][start_frame], gt_info['ids'][start_frame] index = ids.index(id) gts = list() for i in range(start_frame, end_frame + 1): frame_bboxes = gt_info['bboxes'][i] frame_ids = gt_info['ids'][i] index = frame_ids.index(id) bbox = frame_bboxes[index] bbox = BBox.array2bbox(bbox) gts.append(bbox) return gts
def __init__(self, ego_info, pc, start_bbox=None, terrain=None, dets=None): """ the input of a frame should contain several fields, some compulsory, some optional Args: ego_info (4 * 4 matrix): ego matrix pc (N * 3 array): point cloud terrain (N * 3 pc terrain map, optional): ground point cloud. Defaults to None. dets (BBoxes, optional): detection bboxes. Defaults to None. """ self.ego = ego_info self.pc = pc self.terrain = terrain self.dets = None if dets is not None: self.dets = [BBox.array2bbox(det) for det in dets] self.start_bbox = start_bbox self.preprocess() return
def pre_frame_optim(self, pred_motion): """ before optimizing each frame, motion model takes in a roughly predicted motion Args: pred_motion (np.ndarray): a rough prediction for the motion """ # When encountering frame 0 # push the start_bbox and the dummy pred_motion into the buffer if self.size == 0: self.state_buffer.push(self.start_state) self.motion_buffer.push(pred_motion) # At other frames, the predicted motion is right else: prev_bbox = self.state_buffer.last() pred_bbox = BBox.motion2bbox(prev_bbox, pred_motion) self.state_buffer.push(pred_bbox) self.motion_buffer.push(pred_motion) self.size = self.state_buffer.size return
def compare_to_gt(cur_frame_idx, frame_result, gts=None): """ For the detail of frame_result, refer to the get_frame_result in tracker.py """ result = deepcopy(frame_result) max_frame_key = max(list(frame_result.keys())) for i in range(max_frame_key + 1): frame_idx = cur_frame_idx - (max_frame_key - i) bbox0, bbox1 = frame_result[i]['bbox0'], frame_result[i]['bbox1'] result[i]['bbox0'] = BBox.bbox2array(bbox0).tolist() result[i]['bbox1'] = BBox.bbox2array(bbox1).tolist() if gts: gt_bbox0, gt_bbox1 = gts[frame_idx - 1], gts[frame_idx] iou_2d, iou_3d = sot_3d.utils.iou3d(bbox1, gt_bbox1) result[i]['gt_bbox0'] = BBox.bbox2array(gt_bbox0).tolist() result[i]['gt_bbox1'] = BBox.bbox2array(gt_bbox1).tolist() result[i]['gt_motion'] = (BBox.bbox2array(gt_bbox1) - BBox.bbox2array(gt_bbox0))[:4].tolist() result[i]['iou2d'] = iou_2d result[i]['iou3d'] = iou_3d return result
def tracker_api(configs, id, start_bbox, start_frame, data_loader, track_len, gts=None, visualize=False): """ api for the tracker Args: configs: model configuration read from config.yaml id (str): each tracklet has an id start_bbox ([x, y, z, yaw, l, w, h]): the beginning location of this id data_loader (an iterator): iterator returning data of each incoming frame Return: { frame_number0: pred_bbox0, frame_number1: pred_bbox1, ... frame_numberN: pred_bboxN } """ tracker = sot_3d.Tracker(id=id, configs=configs, start_bbox=start_bbox, start_frame=start_frame, track_len=track_len) tracklet_result = dict() for frame_index in range(track_len): print('////////////////////////////////////////') print('Processing {:} {:} / {:}'.format(id, frame_index + 1, track_len)) # initialize a tracker frame_data = next(data_loader) # if the first frame, add the start_bbox input_bbox = None if frame_index == 0: input_bbox = BBox.bbox2world(frame_data['ego'], BBox.array2bbox(start_bbox)) input_data = sot_3d.FrameData(ego_info=frame_data['ego'], pc=frame_data['pc'], start_bbox=input_bbox, terrain=frame_data['terrain'], dets=frame_data['dets']) # run the frame level tracking frame_output = tracker.track(input_data) # the frame 0 may produce no output if not frame_output: continue # if gt is not None, we may compare our prediction with gt frame_result = compare_to_gt(frame_index, frame_output, gts) max_frame_key = max(list(frame_result.keys())) for i in range(max_frame_key + 1): print('BBox0 : {:}'.format(frame_result[i]['bbox0'])) print('BBox1 : {:}'.format(frame_result[i]['bbox1'])) print('Motion : {:}'.format(frame_result[i]['motion'])) if gts: print('GT BBox0 : {:}'.format(frame_result[i]['gt_bbox0'])) print('GT BBox1 : {:}'.format(frame_result[i]['gt_bbox1'])) print('GT Motion: {:}'.format(frame_result[i]['gt_motion'])) print('IOUS : {:} {:}'.format(frame_result[i]['iou2d'], frame_result[i]['iou3d'])) print('\n') tracklet_result[frame_index + start_frame] = frame_result[max_frame_key] if visualize: frame_result_visualization(frame_result[max_frame_key], tracker.input_data.pc) return tracklet_result