def torch_xywh_to_xyxy(boxes): format_checker.check_tensor_shape(boxes, [None, None, 4]) format_checker.check_tensor_type(boxes, 'float') xy = boxes[:, :, :2] wh = boxes[:, :, 2:4] xymin = xy - wh / 2 xymax = xy + wh / 2 return torch.cat([xymin, xymax], dim=-1)
def torch_xyxy_to_xywh(boxes): format_checker.check_tensor_shape(boxes, [None, None, 4]) format_checker.check_tensor_type(boxes, 'float') xymin = boxes[:, :, :2] xymax = boxes[:, :, 2:4] xy = (xymin + xymax) / 2 wh = xymax - xymin return torch.cat([xy, wh], dim=-1)
def encode_ray(lines, proposals): format_checker.check_tensor_shape(lines, [None, 2, 2]) encoded_points = encode_points(lines[:, 0], proposals) direction = lines[:, 0] - lines[:, 1] proposals_xywh = geometry_utils.torch_xyxy_to_xywh( proposals.unsqueeze(0))[0] # pooling_size should be the same in x and y direction normalized_direction = direction / proposals_xywh[:, 2:] norm = torch.norm(normalized_direction, dim=-1) cos = normalized_direction[:, 0] / norm sin = normalized_direction[:, 1] / norm normalized_direction = torch.stack([cos, sin], dim=-1) # theta = torch.atan2(normalized_direction[:, 1], normalized_direction[:, 0]) encoded_lines = torch.cat([encoded_points, normalized_direction], dim=-1) return encoded_lines
def torch_ry_to_rotation_matrix(rotation_y): """ Args: rotation_y: shape(N,) """ format_checker.check_tensor_shape(rotation_y, [None]) zeros = torch.zeros_like(rotation_y) ones = torch.ones_like(rotation_y) rotation_matrix = torch.stack([ torch.cos(rotation_y), zeros, torch.sin(rotation_y), zeros, ones, zeros, -torch.sin(rotation_y), zeros, torch.cos(rotation_y) ], dim=-1).reshape(-1, 3, 3) return rotation_matrix
def torch_xyxy_to_corner_4c(label_boxes_2d): """ Args: boxes_2d: shape(N, M, 4) Returns: boxes_4c: shape(N, M, 4, 2) """ format_checker.check_tensor_shape(label_boxes_2d, [None, None, 4]) left_top = label_boxes_2d[:, :, :2] right_down = label_boxes_2d[:, :, 2:] left_down = label_boxes_2d[:, :, [0, 3]] right_top = label_boxes_2d[:, :, [2, 1]] label_boxes_4c = torch.stack([right_down, left_down, left_top, right_top], dim=2) format_checker.check_tensor_shape(label_boxes_4c, [None, None, 4, 2]) return label_boxes_4c
def decode_batch(encoded_corners_2d_all, final_boxes_2d): """ Args: encoded_corners_2d: shape(N, M, 8 * (4*2+4)) final_bboxes_2d: shape(N, M, 4) Returns: corners_2d: shape(N, M, 8, 2) """ # import ipdb # ipdb.set_trace() N, M = encoded_corners_2d_all.shape[:2] # format_checker.check_tensor_shape(encoded_corners_2d_all, # [None, None, None]) encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, -1) encoded_corners_2d = encoded_corners_2d_all[ ..., :64].contiguous().view(N, M, 8, 4, 2) corners_2d_scores = encoded_corners_2d_all[..., 64:].contiguous().view( N, M, 8, 4) # corners_2d_scores = F.softmax(corners_2d_scores, dim=-1) argmax = corners_2d_scores.max(dim=-1)[-1] # format_checker.check_tensor_shape(visibility, [None, None, 16]) format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4]) batch_size = encoded_corners_2d.shape[0] num_boxes = encoded_corners_2d.shape[1] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) # left_top = final_boxes_2d[:, :, :2].unsqueeze(2) wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2).unsqueeze(2) corners_4c = geometry_utils.torch_xyxy_to_corner_4c(final_boxes_2d) encoded_corners_2d = encoded_corners_2d.view(batch_size, num_boxes, 8, 4, 2) corners_2d = encoded_corners_2d * wh + corners_4c.unsqueeze(2) # sum all # corners_2d = corners_2d.mean(dim=3) row = torch.arange(argmax.numel()).type_as(argmax) corners_2d = corners_2d.view(-1, 4, 2) corners_2d = corners_2d[row, argmax.view(-1)] # corners_2d = corners_2d[..., 3, :] return corners_2d.view(N, M, 8, 2)
def decode_batch_new(encoded_corners_2d_all, final_boxes_2d, p2): """ Args: encoded_corners_2d: shape(N, M, 8 * 4) visibility: shape(N, M, 8*2) final_bboxes_2d: shape(N, M, 4) Returns: corners_2d: shape(N, M, 8, 2) """ N, M = encoded_corners_2d_all.shape[:2] # encoded_corners_2d = torch.cat([encoded_corners_2d_all[:,:,::4],encoded_corners_2d_all[:,:,1::4]],dim=-1) # visibility = torch.cat([encoded_corners_2d_all[:,:,2::4],encoded_corners_2d_all[:,:,3::4]],dim=-1) # encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, 8, 4) # encoded_corners_2d = encoded_corners_2d_all[:, :, :, :2].contiguous( # ).view(N, M, -1) # visibility = encoded_corners_2d_all[:, :, :, 2:].contiguous().view( # N, M, -1) encoded_corners_2d = encoded_corners_2d_all[:, :, :16] format_checker.check_tensor_shape(encoded_corners_2d, [None, None, 16]) # format_checker.check_tensor_shape(visibility, [None, None, 16]) format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4]) batch_size = encoded_corners_2d.shape[0] num_boxes = encoded_corners_2d.shape[1] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) # left_top = final_boxes_2d[:, :, :2].unsqueeze(2) mid = final_boxes_2d_xywh[:, :, :2].unsqueeze(2) wh = final_boxes_2d_xywh[:, :, 2:].unsqueeze(2) encoded_corners_2d = encoded_corners_2d.view(batch_size, num_boxes, 8, 2) # visibility = visibility.view(batch_size, num_boxes, 8, 2) # visibility = F.softmax(visibility, dim=-1)[:, :, :, 1] corners_2d = encoded_corners_2d * wh + mid # remove invisibility points # import ipdb # ipdb.set_trace() # corners_2d[visibility > 0.5] = -1 # .view(batch_size, num_boxes, -1) return corners_2d
def torch_points_3d_to_points_2d(points_3d, p2): """ Args: points_3d: shape(N, 3) p2: shape(3,4) Returns: points_2d: shape(N, 2) """ # import ipdb # ipdb.set_trace() format_checker.check_tensor_shape(points_3d, [None, 3]) format_checker.check_tensor_shape(p2, [3, 4]) points_3d_homo = torch.cat((points_3d, torch.ones_like(points_3d[:, -1:])), dim=-1) points_2d_homo = torch.matmul(p2, points_3d_homo.transpose(0, 1)).transpose(0, 1) points_2d_homo = points_2d_homo / points_2d_homo[:, -1:] return points_2d_homo[:, :2]
def decode_ray(encoded_lines, proposals, p2): format_checker.check_tensor_shape(encoded_lines, [None, 4]) format_checker.check_tensor_shape(proposals, [None, 4]) encoded_points = encoded_lines[:, :2] normalized_direction = encoded_lines[:, 2:] norm = torch.norm(normalized_direction, dim=-1) cos = normalized_direction[:, 0] / norm sin = normalized_direction[:, 1] / norm normalized_direction = torch.stack([cos, sin], dim=-1) proposals_xywh = geometry_utils.torch_xyxy_to_xywh(proposals.unsqueeze(0))[0] deltas = normalized_direction * proposals_xywh[:, 2:] points1 = decode_points(encoded_points, proposals) points2 = points1 - deltas lines = torch.cat([points1, points2], dim=-1) ry = geometry_utils.torch_pts_2d_to_dir_3d( lines.unsqueeze(0), p2.unsqueeze(0))[0].unsqueeze(-1) return ry
def multidim_index(tensor, index): """ Args: tensor: shape(N, M, K) index: shape(S, T) Returns: indexed_tensor: shape(S,T,K) """ format_checker.check_tensor_shape(tensor, [None, None, None]) format_checker.check_tensor_shape(index, [None, None]) tensor = tensor.contiguous() index = index.contiguous() N, M, K = tensor.shape S = index.shape[0] device = tensor.device offset = torch.arange(0, S, device=device) * M index = index + offset.view(S, 1).type_as(index) return tensor.view(-1, K)[index.view(-1)].view(S, -1, K)
def torch_points_2d_to_points_3d(points_2d, depth, p2): """ Args: points_2d: shape(N, 2) depth: shape(N, ) or shape(N, 1) p2: shape(3, 4) """ if len(depth.shape) == 1: depth = depth.unsqueeze(-1) format_checker.check_tensor_shape(points_2d, [None, 2]) format_checker.check_tensor_shape(depth, [None, 1]) format_checker.check_tensor_shape(p2, [3, 4]) points_2d_homo = torch.cat( [points_2d, torch.ones_like(points_2d[:, -1:])], dim=-1) K = p2[:3, :3] KT = p2[:, 3] T = torch.matmul(torch.inverse(K), KT) K_inv = torch.inverse(K) points_3d = torch.matmul(K_inv, (depth * points_2d_homo).permute(1, 0)).permute( 1, 0) # no rotation return points_3d - T
def decode_batch(encoded_corners_2d_all, final_boxes_2d, p2): """ Args: encoded_all: shape(N,M, 2+1+4) """ # import ipdb # ipdb.set_trace() N, M, K = encoded_corners_2d_all.shape left_top = final_boxes_2d[:, :, :2] final_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(final_boxes_2d) wh = final_boxes_2d_xywh[:, :, 2:] N, M = encoded_corners_2d_all.shape[:2] C_2d = encoded_corners_2d_all[:, :, :2] C_2d = C_2d * wh + left_top depth_instance = encoded_corners_2d_all[:, :, 2:3] location = geometry_utils.torch_points_2d_to_points_3d( C_2d, depth_instance, p2) # get orients bottom_corners = encoded_corners_2d_all[:, :, 3:] bottom_corners = bottom_corners * wh + left_top bottom_corners = bottom_corners.view(N, M, 4, 2) ry_left = geometry_utils.torch_pts_2d_to_dir_3d( bottom_corners[:, :, [0, 3]], p2) ry_right = geometry_utils.torch_pts_2d_to_dir_3d( bottom_corners[:, :, [1, 2]], p2) ry = (ry_left + ry_right) / 2 format_checker.check_tensor_shape(C_2d, [None, None, 2]) format_checker.check_tensor_shape(depth_instance, [None, None, 1]) format_checker.check_tensor_shape(bottom_corners, [None, None, 8]) return torch.stack([location, ry], dim=-1)
def torch_window_filter(points_2d, window_shape, deltas=0): """ Args: points_2d: shape(N, M, 2) window_shape: shape(N, 4), each item is like (xmin,ymin, xmax, ymax) deltas: soft interval """ # if len(window_shape.shape) == 1: # window_shape = window_shape.unsqueeze(0) # else: # assert window_shape.shape[0] == points_2d.shape[0] format_checker.check_tensor_shape(points_2d, [None, None, 2]) format_checker.check_tensor_shape(window_shape, [None, 4]) window_shape = window_shape.unsqueeze(1) x_filter = (points_2d[:, :, 0] >= window_shape[:, :, 0] - deltas) & ( points_2d[:, :, 0] <= window_shape[:, :, 2] + deltas) y_filter = (points_2d[:, :, 1] >= window_shape[:, :, 1] - deltas) & ( points_2d[:, :, 1] <= window_shape[:, :, 3] + deltas) return x_filter & y_filter
def decode_batch(encoded_corners_2d_all, final_boxes_2d, p2): """ Args: encoded_all: shape(N, 8*2 + 8) """ N, M = encoded_corners_2d_all.shape[:2] # encoded_corners_2d = torch.cat([encoded_corners_2d_all[:,:,::4],encoded_corners_2d_all[:,:,1::4]],dim=-1) # visibility = torch.cat([encoded_corners_2d_all[:,:,2::4],encoded_corners_2d_all[:,:,3::4]],dim=-1) # center_depth = encoded_corners_2d_all[:, :, -1] # decode center depth # center_depth = - torch.log(center_depth) encoded_corners_2d_all = encoded_corners_2d_all[:, :, :-1] encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, 8, 5) encoded_corners_2d = encoded_corners_2d_all[:, :, :, :3].contiguous( ).view(N, M, -1) visibility = encoded_corners_2d_all[:, :, :, 3:].contiguous().view(N, M, -1) format_checker.check_tensor_shape(encoded_corners_2d, [None, None, 24]) format_checker.check_tensor_shape(visibility, [None, None, 16]) format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4]) encoded_corners_2d = encoded_corners_2d.view(N, M, 8, 3) encoded_front_plane = encoded_corners_2d[:, :, :4] encoded_rear_plane = encoded_corners_2d[:, :, 4:] front_plane = Corner2DNearestCoder.decode_with_bbox( encoded_front_plane, final_boxes_2d) rear_plane = Corner2DNearestCoder.decode_with_bbox( encoded_rear_plane, final_boxes_2d) # reoder the corners # shape(N,M, 8, 2) # front_deltas = front_plane[:, :, 0] - front_plane[:, :, 1] # rear_deltas = rear_plane[:, :, 0] - rear_plane[:, :, 1] # cond = (front_deltas[:, :, 0] * front_deltas[:, :, 1]) * ( # rear_deltas[:, :, 0] * rear_deltas[:, :, 1]) < 0 # rear_plane[cond] = rear_plane[cond][:, [1, 0, 3, 2]] # front_plane = Corner2DNearestCoder.reorder_boxes_4c_decode(front_plane) # rear_plane = Corner2DNearestCoder.reorder_boxes_4c_decode(rear_plane) corners_2d = torch.cat([front_plane, rear_plane], dim=2) # import ipdb # ipdb.set_trace() # import ipdb # ipdb.set_trace() assert p2.shape[0] == 1, 'only one image in a batch' # depth = center_depth.unsqueeze(-1) + corners_2d[:, :, :, 2] # depth = - torch.log(corners_2d[:, :, :, 2]) depth = corners_2d[:, :, :, 2] depth = depth.view(-1) corners_3d = geometry_utils.torch_points_2d_to_points_3d( corners_2d[:, :, :, :2].view(-1, 2), depth, p2[0]).view(N, M, -1, 3) return corners_2d[:, :, Order.reorder()][..., :-1]
def decode_batch(self, encoded_corners_2d_all, auxiliary_dict): """ Args: encoded_all: shape(N, 8*2 + 8) """ final_boxes_2d = auxiliary_dict[constants.KEY_BOXES_2D] # import ipdb # ipdb.set_trace() N, M = encoded_corners_2d_all.shape[:2] # encoded_corners_2d = torch.cat([encoded_corners_2d_all[:,:,::4],encoded_corners_2d_all[:,:,1::4]],dim=-1) # visibility = torch.cat([encoded_corners_2d_all[:,:,2::4],encoded_corners_2d_all[:,:,3::4]],dim=-1) encoded_corners_2d_all = encoded_corners_2d_all.view(N, M, 8, 4) encoded_corners_2d = encoded_corners_2d_all[:, :, :, :2].contiguous( ).view(N, M, -1) visibility = encoded_corners_2d_all[:, :, :, 2:].contiguous().view(N, M, -1) format_checker.check_tensor_shape(encoded_corners_2d, [None, None, 16]) format_checker.check_tensor_shape(visibility, [None, None, 16]) format_checker.check_tensor_shape(final_boxes_2d, [None, None, 4]) encoded_corners_2d = encoded_corners_2d.view(N, M, 8, 2) encoded_front_plane = encoded_corners_2d[:, :, :4] encoded_rear_plane = encoded_corners_2d[:, :, 4:] front_plane = self.decode_with_bbox(encoded_front_plane, final_boxes_2d) rear_plane = self.decode_with_bbox(encoded_rear_plane, final_boxes_2d) # reoder the corners # shape(N,M, 8, 2) # front_deltas = front_plane[:, :, 0] - front_plane[:, :, 1] # rear_deltas = rear_plane[:, :, 0] - rear_plane[:, :, 1] # cond = (front_deltas[:, :, 0] * front_deltas[:, :, 1]) * ( # rear_deltas[:, :, 0] * rear_deltas[:, :, 1]) < 0 # rear_plane[cond] = rear_plane[cond][:, [1, 0, 3, 2]] # front_plane = Corner2DNearestCoder.reorder_boxes_4c_decode(front_plane) # rear_plane = Corner2DNearestCoder.reorder_boxes_4c_decode(rear_plane) corners_2d = torch.cat([front_plane, rear_plane], dim=2) return corners_2d[:, :, Order.reorder()]