def forward(self, seg_feats, part_feats): """Forward pass. Args: seg_feats (torch.Tensor): Point-wise semantic features. part_feats (torch.Tensor): Point-wise part prediction features. Returns: tuple[torch.Tensor]: Score of class and bbox predictions. """ # (B * N, out_x, out_y, out_z, 4) rcnn_batch_size = part_feats.shape[0] # transform to sparse tensors sparse_shape = part_feats.shape[1:4] # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx] sparse_idx = part_feats.sum(dim=-1).nonzero(as_tuple=False) part_features = part_feats[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] coords = sparse_idx.int() part_features = spconv.SparseConvTensor(part_features, coords, sparse_shape, rcnn_batch_size) seg_features = spconv.SparseConvTensor(seg_features, coords, sparse_shape, rcnn_batch_size) # forward rcnn network x_part = self.part_conv(part_features) x_rpn = self.seg_conv(seg_features) merged_feature = torch.cat((x_rpn.features, x_part.features), dim=1) # (N, C) shared_feature = spconv.SparseConvTensor(merged_feature, coords, sparse_shape, rcnn_batch_size) x = self.conv_down(shared_feature) shared_feature = x.dense().view(rcnn_batch_size, -1, 1) shared_feature = self.shared_fc(shared_feature) cls_score = self.conv_cls(shared_feature).transpose( 1, 2).contiguous().squeeze(dim=1) # (B, 1) bbox_pred = self.conv_reg(shared_feature).transpose( 1, 2).contiguous().squeeze(dim=1) # (B, C) return cls_score, bbox_pred
def forward(self, voxel_features, coors, batch_size): """Forward of SparseEncoder. Args: voxel_features (torch.float32): Voxel features in shape (N, C). coors (torch.int32): Coordinates in shape (N, 4), \ the columns in the order of (batch_idx, z_idx, y_idx, x_idx). batch_size (int): Batch size. Returns: dict: Backbone features. """ coors = coors.int() input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size) x = self.conv_input(input_sp_tensor) encode_features = [] for encoder_layer in self.encoder_layers: x = encoder_layer(x) encode_features.append(x) # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) spatial_features = out.dense() N, C, D, H, W = spatial_features.shape spatial_features = spatial_features.view(N, C * D, H, W) return spatial_features
def test_model_fn(batch, model, epoch): coords = batch['locs'].cuda() # (N, 1 + 3), long, cuda, dimension 0 for batch_idx voxel_coords = batch['voxel_locs'].cuda() # (M, 1 + 3), long, cuda p2v_map = batch['p2v_map'].cuda() # (N), int, cuda v2p_map = batch['v2p_map'].cuda() # (M, 1 + maxActive), int, cuda coords_float = batch['locs_float'].cuda() # (N, 3), float32, cuda feats = batch['feats'].cuda() # (N, C), float32, cuda batch_offsets = batch['offsets'].cuda() # (B + 1), int, cuda spatial_shape = batch['spatial_shape'] if cfg.use_coords: feats = torch.cat((feats, coords_float), 1) voxel_feats = pointgroup_ops.voxelization(feats, v2p_map, cfg.mode) # (M, C), float, cuda input_ = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, cfg.batch_size) ret = model(input_, p2v_map, coords_float, coords[:, 0].int(), batch_offsets, epoch) semantic_scores = ret['semantic_scores'] # (N, nClass) float32, cuda pt_offsets = ret['pt_offsets'] # (N, 3), float32, cuda if (epoch > cfg.prepare_epochs): scores, proposals_idx, proposals_offset = ret['proposal_scores'] ##### preds with torch.no_grad(): preds = {} preds['semantic'] = semantic_scores preds['pt_offsets'] = pt_offsets if (epoch > cfg.prepare_epochs): preds['score'] = scores preds['proposals'] = (proposals_idx, proposals_offset) return preds
def test_SparseBasicBlock(): voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315], [6.8162713, -2.480431, -1.3616394, 0.36], [11.643568, -4.744306, -1.3580885, 0.16], [23.482342, 6.5036807, 0.5806964, 0.35]], dtype=torch.float32) # n, point_features coordinates = torch.tensor( [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232], [1, 35, 930, 469]], dtype=torch.int32) # n, 4(batch, ind_x, ind_y, ind_z) # test input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates, [41, 1600, 1408], 2) self = SparseBasicBlock( 4, 4, conv_cfg=dict(type='SubMConv3d', indice_key='subm1'), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)) # test conv and bn layer assert isinstance(self.conv1, spconv.conv.SubMConv3d) assert self.conv1.in_channels == 4 assert self.conv1.out_channels == 4 assert isinstance(self.conv2, spconv.conv.SubMConv3d) assert self.conv2.out_channels == 4 assert self.conv2.out_channels == 4 assert self.bn1.eps == 1e-3 assert self.bn1.momentum == 0.01 out_features = self(input_sp_tensor) assert out_features.features.shape == torch.Size([4, 4])
def forward(self, input): identity = spconv.SparseConvTensor(input.features, input.indices, input.spatial_shape, input.batch_size) output = self.conv_branch(input) output.features += self.i_branch(identity).features return output
def clusters_voxelization(self, clusters_idx, clusters_offset, feats, coords, fullscale, scale, mode): ''' :param clusters_idx: (SumNPoint, 2), int, dim 0 for cluster_id, dim 1 for corresponding point idxs in N, cpu :param clusters_offset: (nCluster + 1), int, cpu :param feats: (N, C), float, cuda :param coords: (N, 3), float, cuda :return: ''' c_idxs = clusters_idx[:, 1].cuda() clusters_feats = feats[c_idxs.long()] clusters_coords = coords[c_idxs.long()] clusters_coords_mean = pointgroup_ops.sec_mean(clusters_coords, clusters_offset.cuda()) # (nCluster, 3), float clusters_coords_mean = torch.index_select(clusters_coords_mean, 0, clusters_idx[:, 0].cuda().long()) # (sumNPoint, 3), float clusters_coords -= clusters_coords_mean clusters_coords_min = pointgroup_ops.sec_min(clusters_coords, clusters_offset.cuda()) # (nCluster, 3), float clusters_coords_max = pointgroup_ops.sec_max(clusters_coords, clusters_offset.cuda()) # (nCluster, 3), float clusters_scale = 1 / ((clusters_coords_max - clusters_coords_min) / fullscale).max(1)[ 0] - 0.01 # (nCluster), float clusters_scale = torch.clamp(clusters_scale, min=None, max=scale) min_xyz = clusters_coords_min * clusters_scale.unsqueeze(-1) # (nCluster, 3), float max_xyz = clusters_coords_max * clusters_scale.unsqueeze(-1) clusters_scale = torch.index_select(clusters_scale, 0, clusters_idx[:, 0].cuda().long()) clusters_coords = clusters_coords * clusters_scale.unsqueeze(-1) range = max_xyz - min_xyz offset = - min_xyz + torch.clamp(fullscale - range - 0.001, min=0) * torch.rand(3).cuda() + torch.clamp( fullscale - range + 0.001, max=0) * torch.rand(3).cuda() offset = torch.index_select(offset, 0, clusters_idx[:, 0].cuda().long()) clusters_coords += offset assert clusters_coords.shape.numel() == ((clusters_coords >= 0) * (clusters_coords < fullscale)).sum() clusters_coords = clusters_coords.long() clusters_coords = torch.cat([clusters_idx[:, 0].view(-1, 1).long(), clusters_coords.cpu()], 1) # (sumNPoint, 1 + 3) out_coords, inp_map, out_map = pointgroup_ops.voxelization_idx(clusters_coords, int(clusters_idx[-1, 0]) + 1, mode) # output_coords: M * (1 + 3) long # input_map: sumNPoint int # output_map: M * (maxActive + 1) int out_feats = pointgroup_ops.voxelization(clusters_feats, out_map.cuda(), mode) # (M, C), float, cuda spatial_shape = [fullscale] * 3 voxelization_feats = spconv.SparseConvTensor(out_feats, out_coords.int().cuda(), spatial_shape, int(clusters_idx[-1, 0]) + 1) return voxelization_feats, inp_map
def test_make_sparse_convmodule(): from mmdet3d.ops import make_sparse_convmodule voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315], [6.8162713, -2.480431, -1.3616394, 0.36], [11.643568, -4.744306, -1.3580885, 0.16], [23.482342, 6.5036807, 0.5806964, 0.35]], dtype=torch.float32) # n, point_features coordinates = torch.tensor( [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232], [1, 35, 930, 469]], dtype=torch.int32) # n, 4(batch, ind_x, ind_y, ind_z) # test input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates, [41, 1600, 1408], 2) sparse_block0 = make_sparse_convmodule(4, 16, 3, 'test0', stride=1, padding=0, conv_type='SubMConv3d', norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), order=('conv', 'norm', 'act')) assert isinstance(sparse_block0[0], spconv.SubMConv3d) assert sparse_block0[0].in_channels == 4 assert sparse_block0[0].out_channels == 16 assert isinstance(sparse_block0[1], torch.nn.BatchNorm1d) assert sparse_block0[1].eps == 0.001 assert sparse_block0[1].momentum == 0.01 assert isinstance(sparse_block0[2], torch.nn.ReLU) # test forward out_features = sparse_block0(input_sp_tensor) assert out_features.features.shape == torch.Size([4, 16]) sparse_block1 = make_sparse_convmodule(4, 16, 3, 'test1', stride=1, padding=0, conv_type='SparseInverseConv3d', norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), order=('norm', 'act', 'conv')) assert isinstance(sparse_block1[0], torch.nn.BatchNorm1d) assert isinstance(sparse_block1[1], torch.nn.ReLU) assert isinstance(sparse_block1[2], spconv.SparseInverseConv3d)
def forward(self, voxel_features, coors, batch_size): """Forward of SparseUNet. Args: voxel_features (torch.float32): Voxel features in shape [N, C]. coors (torch.int32): Coordinates in shape [N, 4], the columns in the order of (batch_idx, z_idx, y_idx, x_idx). batch_size (int): Batch size. Returns: dict[str, torch.Tensor]: Backbone features. """ coors = coors.int() input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size) x = self.conv_input(input_sp_tensor) encode_features = [] for encoder_layer in self.encoder_layers: x = encoder_layer(x) encode_features.append(x) # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) spatial_features = out.dense() N, C, D, H, W = spatial_features.shape spatial_features = spatial_features.view(N, C * D, H, W) # for segmentation head, with output shape: # [400, 352, 11] <- [200, 176, 5] # [800, 704, 21] <- [400, 352, 11] # [1600, 1408, 41] <- [800, 704, 21] # [1600, 1408, 41] <- [1600, 1408, 41] decode_features = [] x = encode_features[-1] for i in range(self.stage_num, 0, -1): x = self.decoder_layer_forward(encode_features[i - 1], x, getattr(self, f'lateral_layer{i}'), getattr(self, f'merge_layer{i}'), getattr(self, f'upsample_layer{i}')) decode_features.append(x) seg_features = decode_features[-1].features ret = dict(spatial_features=spatial_features, seg_features=seg_features) return ret
def forward(self, input): output = self.blocks(input) identity = spconv.SparseConvTensor(output.features, output.indices, output.spatial_shape, output.batch_size) if len(self.nPlanes) > 1: output_decoder = self.conv(output) output_decoder = self.u(output_decoder) output_decoder = self.deconv(output_decoder) output.features = torch.cat( (identity.features, output_decoder.features), dim=1) output = self.blocks_tail(output) return output
def forward(self, voxel_features, coors, batch_size, img_feats, img_metas, is_test=False): """Forward of SparseEncoder. Args: voxel_features (torch.float32): Voxel features in shape (N, C). coors (torch.int32): Coordinates in shape (N, 4), \ the columns in the order of (batch_idx, z_idx, y_idx, x_idx). batch_size (int): Batch size. Returns: dict: Backbone features. """ #print('img_feats',img_feats[0].shape) #[1, 256, 88, 288] #print('img_feats',img_feats[1].shape) #[1, 512, 84, 272]) #print('img_feats',img_feats[2].shape) #([1, 1024, 42, 136]) #print('img_feats',img_feats[3].shape) #[1, 2048, 21, 68]) points_mean = torch.zeros_like(voxel_features) points_mean[:, 0] = coors[:, 0] points_mean[:, 1:] = voxel_features[:, :3] coors = coors.int() input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size) x = self.conv_input(input_sp_tensor) encode_features = [] middle = [] i = 0 for encoder_layer in self.encoder_layers: x = encoder_layer(x) #print('x dtype',x.dtype) indices = x.indices spatial_shape = x.spatial_shape batch_size = x.batch_size vx_feat, vx_nxyz = tensor2points(i, x, (0, -40., -3.)) print('vx_feat dtype', vx_feat.dtype) print('vx_nxyz dtype', vx_nxyz.dtype) img_feat = img_feats[i] print('img_feat dtype', img_feat.dtype) point_img = sample_single(img_feat, vx_nxyz, img_metas[0]) print('point_img dtype', point_img.dtype) fusion_features = self.Fusion_Conv[i](vx_feat, point_img) print('fusion_features dtype', fusion_features.dtype) #x = spconv.SparseConvTensor(fusion_features, indices,spatial_shape, batch_size) #x.features = fusion_features print('x111 features dtype', x.features.dtype) print('x111 features shape', x.features.shape) print('indices1111 dtype', indices.dtype) i += 1 middle.append((vx_nxyz, fusion_features)) encode_features.append(x) # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) spatial_features = out.dense() N, C, D, H, W = spatial_features.shape spatial_features = spatial_features.view(N, C * D, H, W) if is_test: return spatial_features else: vx_nxyz, vx_feat = middle[0] p0 = nearest_neighbor_interpolate(points_mean, vx_nxyz, vx_feat.contiguous()) vx_nxyz, vx_feat = middle[1] p1 = nearest_neighbor_interpolate(points_mean, vx_nxyz, vx_feat.contiguous()) vx_nxyz, vx_feat = middle[2] p2 = nearest_neighbor_interpolate(points_mean, vx_nxyz, vx_feat.contiguous()) vx_nxyz, vx_feat = middle[3] p3 = nearest_neighbor_interpolate(points_mean, vx_nxyz, vx_feat.contiguous()) pointwise = self.point_fc(torch.cat([p0, p1, p2, p3], dim=-1)) point_cls = self.point_cls(pointwise) point_reg = self.point_reg(pointwise) return spatial_features, (points_mean, point_cls, point_reg)
def model_fn(batch, model, epoch): ##### prepare input and forward # batch {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map, # 'locs_float': locs_float, 'feats': feats, 'labels': labels, 'instance_labels': instance_labels, # 'instance_info': instance_infos, 'instance_pointnum': instance_pointnum, # 'id': tbl, 'offsets': batch_offsets, 'spatial_shape': spatial_shape} coords = batch['locs'].cuda() # (N, 1 + 3), long, cuda, dimension 0 for batch_idx voxel_coords = batch['voxel_locs'].cuda() # (M, 1 + 3), long, cuda p2v_map = batch['p2v_map'].cuda() # (N), int, cuda v2p_map = batch['v2p_map'].cuda() # (M, 1 + maxActive), int, cuda coords_float = batch['locs_float'].cuda() # (N, 3), float32, cuda feats = batch['feats'].cuda() # (N, C), float32, cuda labels = batch['labels'].cuda() # (N), long, cuda instance_labels = batch['instance_labels'].cuda() # (N), long, cuda, 0~total_nInst, -100 instance_info = batch['instance_info'].cuda() # (N, 9), float32, cuda, (meanxyz, minxyz, maxxyz) instance_pointnum = batch['instance_pointnum'].cuda() # (total_nInst), int, cuda batch_offsets = batch['offsets'].cuda() # (B + 1), int, cuda spatial_shape = batch['spatial_shape'] if cfg.use_coords: feats = torch.cat((feats, coords_float), 1) voxel_feats = pointgroup_ops.voxelization(feats, v2p_map, cfg.mode) # (M, C), float, cuda input_ = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, cfg.batch_size) ret = model(input_, p2v_map, coords_float, coords[:, 0].int(), batch_offsets, epoch) semantic_scores = ret['semantic_scores'] # (N, nClass) float32, cuda pt_offsets = ret['pt_offsets'] # (N, 3), float32, cuda if (epoch > cfg.prepare_epochs): scores, proposals_idx, proposals_offset = ret['proposal_scores'] # scores: (nProposal, 1) float, cuda # proposals_idx: (sumNPoint, 2), int, cpu, dim 0 for cluster_id, dim 1 for corresponding point idxs in N # proposals_offset: (nProposal + 1), int, cpu loss_inp = {} loss_inp['semantic_scores'] = (semantic_scores, labels) loss_inp['pt_offsets'] = (pt_offsets, coords_float, instance_info, instance_labels) if (epoch > cfg.prepare_epochs): loss_inp['proposal_scores'] = (scores, proposals_idx, proposals_offset, instance_pointnum) loss, loss_out, infos = loss_fn(loss_inp, epoch) ##### accuracy / visual_dict / meter_dict with torch.no_grad(): preds = {} preds['semantic'] = semantic_scores preds['pt_offsets'] = pt_offsets if (epoch > cfg.prepare_epochs): preds['score'] = scores preds['proposals'] = (proposals_idx, proposals_offset) visual_dict = {} visual_dict['loss'] = loss for k, v in loss_out.items(): visual_dict[k] = v[0] meter_dict = {} meter_dict['loss'] = (loss.item(), coords.shape[0]) for k, v in loss_out.items(): meter_dict[k] = (float(v[0]), v[1]) return loss, preds, visual_dict, meter_dict