def encode(self, label_boxes_3d, label_boxes_2d, p2, image_info): corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) front_plane = corners_2d[:, Order.planes()[0]] rear_plane = corners_2d[:, Order.planes()[1]] encoded_front_plane, reorder_front_plane = self.encode_with_bbox( front_plane, label_boxes_2d) encoded_rear_plane, reorder_rear_plane = self.encode_with_bbox( rear_plane, label_boxes_2d) encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane], dim=1) # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d, # label_boxes_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) # DONE fix bugs of reorder for visibility reorder_corners_2d = torch.cat( [reorder_front_plane, reorder_rear_plane], dim=1) image_filter = geometry_utils.torch_window_filter(reorder_corners_2d, image_shape, deltas=200) visibility = image_filter # visibility = torch.cat( # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1) encoded_all = torch.cat( [encoded_points, visibility.unsqueeze(-1).float()], dim=-1) return encoded_all.view(encoded_all.shape[0], -1)
def encode(label_boxes_3d, label_boxes_2d, p2): """ Args: label_boxes_3d: shape(N, K) Returns: C_2d: shape(N, 2) depth: shape(N, ) side_points_2d: shape(N, 2, 2) """ import ipdb ipdb.set_trace() num_samples = label_boxes_3d.shape[0] location = label_boxes_3d[:, :3] C_2d = geometry_utils.torch_points_3d_to_points_2d(location, p2) instance_depth = location[:, 2] # get side points (two side, yep we predict both of them) corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) bottom_corners = corners_2d[:, [0, 1, 2, 3]] # left_side = corners_2d[:,[0,3]] # right_side = corners_2d[:,[1,2]] encoded_all = torch.cat( [C_2d, instance_depth, bottom_corners.view(num_samples, -1)], dim=-1) return encoded_all
def encode(label_boxes_3d, label_boxes_2d, p2, image_info): corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) # encode depth first center_depth = label_boxes_3d[:, 2] # encoded_depth = corners_3d[..., -1] - center_depth.unsqueeze(-1) # encoded_depth = 1/F.sigmoid(corners_3d[..., -1]) - 1 encoded_depth = corners_3d[..., -1] corners_2d = torch.cat( [corners_2d, encoded_depth.unsqueeze(-1)], dim=-1) front_plane = corners_2d[:, Order.planes()[0]] rear_plane = corners_2d[:, Order.planes()[1]] encoded_front_plane, reorder_front_plane = Corner2DNearestCoder.encode_with_bbox( front_plane, label_boxes_2d) encoded_rear_plane, reorder_rear_plane = Corner2DNearestCoder.encode_with_bbox( rear_plane, label_boxes_2d) encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane], dim=1) # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d, # label_boxes_2d) image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) # DONE fix bugs of reorder for visibility reorder_corners_2d = torch.cat( [reorder_front_plane, reorder_rear_plane], dim=1) # remove depth channels image_filter = geometry_utils.torch_window_filter( reorder_corners_2d[:, :, :-1], image_shape, deltas=200) visibility = image_filter # visibility = torch.cat( # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1) encoded_all = torch.cat( [encoded_points, visibility.unsqueeze(-1).float()], dim=-1) encoded_all = encoded_all.view(encoded_all.shape[0], -1) # append center_depth # encode center detph # center_depth = 1/F.sigmoid(center_depth) - 1 return torch.cat([encoded_all, center_depth.unsqueeze(-1)], dim=-1)
def _generate_keypoint(self, label_boxes_3d, p2, image_info): """ Args: """ # get keypoint corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) # get visibility image_shape = torch.tensor([0, 0, image_info[1], image_info[0]]) image_shape = image_shape.type_as(corners_2d).view(1, 4) image_filter = geometry_utils.torch_window_filter(corners_2d, image_shape, deltas=200) keypoint = torch.cat( [corners_2d, image_filter.unsqueeze(-1).float()], dim=-1) return keypoint
def encode(label_boxes_3d, proposals, p2): label_corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) boxes_3d_proj = geometry_utils.torch_corners_2d_to_boxes_2d( label_corners_2d) boxes_3d_proj_xywh = geometry_utils.torch_xyxy_to_xywh( boxes_3d_proj.unsqueeze(0)).squeeze(0) # shape(N, 2, 2) center_side = OrientsCoder._get_center_side(label_corners_2d) # center_side = OrientsCoder._get_visible_side(label_corners_2d) # label_boxes_2d_proj = geometry_utils.corners_2d_to_boxes_2d( # label_corners_2d) label_orients = OrientsCoder._generate_orients(center_side, proposals) reg_orients = label_orients[:, 1:3] reg_orients = reg_orients / boxes_3d_proj_xywh[:, 2:] label_orients = torch.cat([label_orients[:, :1], reg_orients], dim=-1) return label_orients
def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = super().loss(prediction_dict, feed_dict) targets = prediction_dict[constants.KEY_TARGETS] proposals = prediction_dict[constants.KEY_PROPOSALS] p2 = feed_dict[constants.KEY_STEREO_CALIB_P2] image_info = feed_dict[constants.KEY_IMAGE_INFO] corners_2d_loss = 0 center_depth_loss = 0 location_loss = 0 mean_dims = torch.tensor([1.8, 1.8, 3.7]).type_as(proposals) for stage_ind in range(self.num_stages): # dims loss # dim_target = targets[stage_ind][3] # dim_loss = common_loss.calc_loss(self.rcnn_bbox_loss, dim_target, # True) corners_target = targets[stage_ind][2] # dims_preds = targets[stage_ind][3]['pred'] preds = corners_target['pred'] N, M = preds.shape[:2] targets = corners_target['target'] weights = corners_target['weight'] # gt corners_2d_gt = targets[:, :, :16] location_gt = targets[:, :, 16:19] dims_gt = targets[:, :, 19:] center_depth_gt = location_gt[:, :, -1:] center_depth_preds = preds[:, :, :1] center_2d_deltas_preds = preds[:, :, 1:3] ry_preds = preds[:, :, 3:4] # import ipdb # ipdb.set_trace() dims_preds = torch.exp(preds[:, :, 4:]) * mean_dims # convert to corners 2d # convert to location # decode center_2d proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals) center_2d_preds = ( center_2d_deltas_preds * proposals_xywh[:, :, 2:] + proposals_xywh[:, :, :2]) location_preds = [] for batch_ind in range(N): location_preds.append( geometry_utils.torch_points_2d_to_points_3d( center_2d_preds[batch_ind], center_depth_preds[batch_ind], p2[batch_ind])) location_preds = torch.stack(location_preds, dim=0) # concat # import ipdb # ipdb.set_trace() boxes_3d_preds = torch.cat( [location_preds, dims_preds.detach(), ry_preds], dim=-1) corners_2d_preds = [] for batch_ind in range(N): corners_2d_preds.append( geometry_utils.torch_boxes_3d_to_corners_2d( boxes_3d_preds[batch_ind], p2[batch_ind])) corners_2d_preds = torch.stack(corners_2d_preds, dim=0).view(N, M, -1) weights = weights.unsqueeze(-1) # import ipdb # ipdb.set_trace() # corners depth loss and center depth loss center_depth_loss = self.l1_loss(center_depth_preds, center_depth_gt) * weights # location loss location_loss = self.l1_loss(location_preds, location_gt) * weights # import ipdb # ipdb.set_trace() # dims loss dims_loss = self.smooth_l1_loss(dims_preds, dims_gt) * weights # proj 2d loss zeros = torch.zeros_like(image_info[:, 0]) image_shape = torch.stack( [zeros, zeros, image_info[:, 1], image_info[:, 0]], dim=-1) image_shape = image_shape.type_as(corners_2d_gt).view(-1, 4) image_filter = geometry_utils.torch_window_filter( corners_2d_gt.contiguous().view(N, -1, 2), image_shape, deltas=200).float().view(N, M, -1) corners_2d_loss = self.l1_loss(corners_2d_preds, corners_2d_gt) * weights corners_2d_loss = (corners_2d_loss.view(N, M, 8, 2) * image_filter.unsqueeze(-1)).view(N, M, -1) loss_dict.update({ # 'global_corners_loss': global_corners_loss * 10, 'corners_2d_loss': corners_2d_loss, 'center_depth_loss': center_depth_loss * 10, 'location_loss': location_loss * 10, # 'rcnn_corners_loss': rcnn_corners_loss, 'dims_loss': dims_loss }) return loss_dict
def encode(label_boxes_3d, proposals, p2, image_info, label_boxes_2d): """ projection points of 3d bbox center and its corners_3d in local coordinates frame Returns: depth of center: center 3d location: local_corners: """ num_instances = label_boxes_3d.shape[0] # global to local corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d( label_boxes_3d, p2) proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0)).squeeze(0) wh = proposals_xywh[:, 2:].unsqueeze(1) xy = proposals_xywh[:, :2].unsqueeze(1) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) bottom_corners_3d = corners_3d[:, [0, 1, 2, 3]] visible_index = Corner3DCoder.find_visible_side(bottom_corners_3d) visible_corners_3d = tensor_utils.multidim_index( bottom_corners_3d, visible_index) visible_side_line_2d = geometry_utils.torch_points_3d_to_points_2d( visible_corners_3d.contiguous().view(-1, 3), p2).view(num_instances, -1, 2) visible_cond = ( visible_side_line_2d[:, 1, 0] - visible_side_line_2d[:, 0, 0] ) * (visible_side_line_2d[:, 2, 0] - visible_side_line_2d[:, 0, 0]) < 0 # visible_index[invisible_cond, -1] = visible_index[invisible_cond, -2] _, order = torch.sort(visible_side_line_2d[..., 0], dim=-1, descending=False) visible_index = tensor_utils.multidim_index( visible_index.unsqueeze(-1), order).squeeze(-1) # import ipdb # ipdb.set_trace() bottom_corners = corners_2d[:, [0, 1, 2, 3]] top_corners = corners_2d[:, [4, 5, 6, 7]] bottom_corners = tensor_utils.multidim_index(bottom_corners, visible_index) top_corners = tensor_utils.multidim_index(top_corners, visible_index) bottom_corners_3d = tensor_utils.multidim_index( bottom_corners_3d, visible_index) dist = torch.norm(bottom_corners_3d, dim=-1) merge_left_cond = dist[:, 0] < dist[:, 2] # box truncated # import ipdb # ipdb.set_trace() # bottom # left bottom_corners[:, 0, 0] = torch.min(bottom_corners[:, 0, 0], label_boxes_2d[:, 2]) bottom_corners[:, 0, 0] = torch.max(bottom_corners[:, 0, 0], label_boxes_2d[:, 0]) # right bottom_corners[:, 2, 0] = torch.min(bottom_corners[:, 2, 0], label_boxes_2d[:, 2]) bottom_corners[:, 2, 0] = torch.max(bottom_corners[:, 2, 0], label_boxes_2d[:, 0]) # top top_corners[:, 0, 0] = torch.min(top_corners[:, 0, 0], label_boxes_2d[:, 2]) top_corners[:, 0, 0] = torch.max(top_corners[:, 0, 0], label_boxes_2d[:, 0]) top_corners[:, 2, 0] = torch.min(top_corners[:, 2, 0], label_boxes_2d[:, 2]) top_corners[:, 2, 0] = torch.max(top_corners[:, 2, 0], label_boxes_2d[:, 0]) in_box_cond = (bottom_corners[:, 1, 0] < label_boxes_2d[:, 2]) & ( bottom_corners[:, 1, 0] > label_boxes_2d[:, 0]) # bottom_corners[:, [0, 2], 0] = bottom_corners[:, [0, 2], 0] # top_corners[:, :, 0] = top_corners[:, :, 0].clamp( # min=0, max=image_info[1]) visibility = visible_cond.float() * in_box_cond.float() # import ipdb # ipdb.set_trace() index = torch.nonzero(visibility <= 0).view(-1) tmp = bottom_corners[index] merge_left_cond = merge_left_cond[index] merge_right_cond = ~merge_left_cond tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1) tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1) # tmp = torch.cat( # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0) tmp[merge_left_cond] = tmp_left[merge_left_cond] tmp[merge_right_cond] = tmp_right[merge_right_cond] bottom_corners[index] = tmp tmp = top_corners[index] # tmp = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1) tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1) tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1) tmp[merge_left_cond] = tmp[merge_left_cond] tmp[merge_right_cond] = tmp[merge_right_cond] # tmp = torch.cat( # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0) top_corners[index] = tmp # encode encoded_bottom_corners = (bottom_corners - xy) / wh encoded_heights = (bottom_corners[..., 1] - top_corners[..., 1]) / wh[..., 1] # import ipdb # ipdb.set_trace() mid_x = bottom_corners[:, 1, 0] ratio = (mid_x - proposals[:, 0]) / wh[:, 0, 0] ratio = ratio.clamp(min=0, max=1) # import ipdb # ipdb.set_trace() # encoded_bottom_corners = tensor_utils.multidim_index( # encoded_bottom_corners, visible_index) # encoded_heights = tensor_utils.multidim_index( # encoded_heights.unsqueeze(-1), visible_index) # tensor_utils. # visibility = tensor_utils.multidim_index( # visibility.unsqueeze(-1), visible_index).squeeze(-1) return torch.cat([ encoded_bottom_corners.contiguous().view(num_instances, -1), encoded_heights.contiguous().view(num_instances, -1), ratio.view(num_instances, -1) ], dim=-1)