def __init__(self, num_layers, ndim, shape, kernel_size, stride, padding, dilation): super().__init__() layers = [ spconv.SparseMaxPool3d(kernel_size, stride, padding, dilation) ] for i in range(1, num_layers): layers.append( spconv.SparseMaxPool3d(kernel_size, stride, padding, dilation)) self.net = spconv.SparseSequential(*layers, ) self.shape = shape
def __init__(self, output_shape, use_norm=True, num_input_features=128, num_filters_down1=[64], num_filters_down2=[64, 64], name='SpMiddleFHDV2'): super(SpMiddleFHDV2, self).__init__() self.name = name if use_norm: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm2d) BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d) Conv2d = change_default_args(bias=False)(nn.Conv2d) SpConv3d = change_default_args(bias=False)(spconv.SparseConv3d) SubMConv3d = change_default_args(bias=False)(spconv.SubMConv3d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty BatchNorm1d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) SpConv3d = change_default_args(bias=True)(spconv.SparseConv3d) SubMConv3d = change_default_args(bias=True)(spconv.SubMConv3d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) sparse_shape = np.array(output_shape[1:4]) + [1, 0, 0] # sparse_shape[0] = 11 print(sparse_shape) self.sparse_shape = sparse_shape self.voxel_output_shape = output_shape # input: # [1600, 1200, 41] self.middle_conv = spconv.SparseSequential( SubMConv3d(num_input_features, 16, 3, indice_key="subm0"), BatchNorm1d(16), nn.ReLU(), SubMConv3d(16, 16, 3, indice_key="subm0"), BatchNorm1d(16), nn.ReLU(), SpConv3d(16, 32, 3, 2, padding=1), # [1600, 1200, 41] -> [800, 600, 21] BatchNorm1d(32), nn.ReLU(), SubMConv3d(32, 32, 3, indice_key="subm1"), BatchNorm1d(32), nn.ReLU(), SubMConv3d(32, 32, 3, indice_key="subm1"), BatchNorm1d(32), nn.ReLU(), SpConv3d(32, 64, 3, 2, padding=1), # [800, 600, 21] -> [400, 300, 11] BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm2"), BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm2"), BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm2"), BatchNorm1d(64), nn.ReLU(), SpConv3d(64, 64, 3, 2, padding=[0, 1, 1]), # [400, 300, 11] -> [200, 150, 5] BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm3"), BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm3"), BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm3"), BatchNorm1d(64), nn.ReLU(), SpConv3d(64, 64, (3, 1, 1), (2, 1, 1)), # [200, 150, 5] -> [200, 150, 2] BatchNorm1d(64), nn.ReLU(), spconv.SparseMaxPool3d([2, 1, 1]), ) self.max_batch_size = 6 self.grid = torch.full([self.max_batch_size, *sparse_shape], -1, dtype=torch.int32).cuda()
def __init__( self, num_point_features, #16 rcnn_cfg, # **kwargs): super().__init__(rcnn_target_config=rcnn_cfg.TARGET_CONFIG) self.SA_modules = nn.ModuleList() block = self.post_act_block self.conv_part = spconv.SparseSequential( block(4, 64, 3, padding=1, indice_key='rcnn_subm1'), block(64, 64, 3, padding=1, indice_key='rcnn_subm1_1'), ) self.conv_rpn = spconv.SparseSequential( block(num_point_features, 64, 3, padding=1, indice_key='rcnn_subm2'), block(64, 64, 3, padding=1, indice_key='rcnn_subm1_2'), ) self.conv_down = spconv.SparseSequential( # [14, 14, 14] -> [7, 7, 7] block(128, 128, 3, padding=1, indice_key='rcnn_subm2'), block(128, 128, 3, padding=1, indice_key='rcnn_subm2'), spconv.SparseMaxPool3d(kernel_size=2, stride=2), block(128, 128, 3, padding=1, indice_key='rcnn_subm3'), block(128, rcnn_cfg.SHARED_FC[0], 3, padding=1, indice_key='rcnn_subm3'), ) #首先生成share全连接层,生成三层网络,每一层是1维卷积, #大概的结构就是1.conv1d->bn->relu->dropout-(512*7*7*7,)->() # 2.conv1d->bn->relu->dropout- # 3.conv1d->bn->relu shared_fc_list = [] pool_size = rcnn_cfg.ROI_AWARE_POOL_SIZE // 2 pre_channel = rcnn_cfg.SHARED_FC[ 0] * pool_size * pool_size * pool_size #512*7*7 for k in range(1, rcnn_cfg.SHARED_FC.__len__()): shared_fc_list.append( pt_utils.Conv1d(pre_channel, rcnn_cfg.SHARED_FC[k], bn=True)) pre_channel = rcnn_cfg.SHARED_FC[k] if k != rcnn_cfg.SHARED_FC.__len__() - 1 and rcnn_cfg.DP_RATIO > 0: shared_fc_list.append(nn.Dropout(rcnn_cfg.DP_RATIO)) self.shared_fc_layer = nn.Sequential(*shared_fc_list) #share fc layer这一层包含有三层一维度卷积,每一层卷积大概的结构是conv1d->bn->relu->dropout(0.3) #(256*7*7*7,512)->(512,512)-> (512,512)这是三个卷积层的通道数变化 #生成分类层 #cls_layer包含三层一维卷积,大概的结构是 # (0)conv->bn->relu->dropout(0.3),(512,256) # (1)conv->bn->relu (256,256) # (2) conv (256,1) channel_in = rcnn_cfg.SHARED_FC[-1] # Classification layer cls_channel = 1 cls_layers = [] pre_channel = channel_in for k in range(0, rcnn_cfg.CLS_FC.__len__()): cls_layers.append( pt_utils.Conv1d(pre_channel, rcnn_cfg.CLS_FC[k], bn=True)) pre_channel = rcnn_cfg.CLS_FC[k] cls_layers.append( pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) if rcnn_cfg.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(rcnn_cfg.DP_RATIO)) self.cls_layer = nn.Sequential(*cls_layers) #生成回归层 #回归层大概就是 #(0)conv1d->bn->relu (512,256) #(1)dropout(0.3) #(2)conv1d->bn->relu (256,256) # (3)conv1d (256,7) reg_layers = [] pre_channel = channel_in for k in range(0, rcnn_cfg.REG_FC.__len__()): reg_layers.append( pt_utils.Conv1d(pre_channel, rcnn_cfg.REG_FC[k], bn=True)) pre_channel = rcnn_cfg.REG_FC[k] reg_layers.append( pt_utils.Conv1d(pre_channel, self.box_coder.code_size, activation=None)) if rcnn_cfg.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(rcnn_cfg.DP_RATIO)) self.reg_layer = nn.Sequential(*reg_layers) #池化层ROI_AWARE_POOL_SIZE=14 self.roiaware_pool3d_layer = roiaware_pool3d_utils.RoIAwarePool3d( out_size=rcnn_cfg.ROI_AWARE_POOL_SIZE, max_pts_each_voxel=128) self.init_weights(weight_init='xavier')
def __init__(self, num_point_features, rcnn_cfg, **kwargs): super().__init__(rcnn_target_config=cfg.MODEL.RCNN.TARGET_CONFIG) self.SA_modules = nn.ModuleList() block = self.post_act_block self.conv_part = spconv.SparseSequential( block(4, 64, 3, padding=1, indice_key='rcnn_subm1'), block(64, 64, 3, padding=1, indice_key='rcnn_subm1_1'), ) self.conv_rpn = spconv.SparseSequential( block(num_point_features, 64, 3, padding=1, indice_key='rcnn_subm2'), block(64, 64, 3, padding=1, indice_key='rcnn_subm1_2'), ) self.conv_down = spconv.SparseSequential( # [14, 14, 14] -> [7, 7, 7] block(128, 128, 3, padding=1, indice_key='rcnn_subm2'), block(128, 128, 3, padding=1, indice_key='rcnn_subm2'), spconv.SparseMaxPool3d(kernel_size=2, stride=2), block(128, 128, 3, padding=1, indice_key='rcnn_subm3'), block(128, rcnn_cfg.SHARED_FC[0], 3, padding=1, indice_key='rcnn_subm3'), ) shared_fc_list = [] pool_size = rcnn_cfg.ROI_AWARE_POOL_SIZE // 2 pre_channel = rcnn_cfg.SHARED_FC[0] * pool_size * pool_size * pool_size for k in range(1, rcnn_cfg.SHARED_FC.__len__()): shared_fc_list.append(pt_utils.Conv1d(pre_channel, rcnn_cfg.SHARED_FC[k], bn=True)) pre_channel = rcnn_cfg.SHARED_FC[k] if k != rcnn_cfg.SHARED_FC.__len__() - 1 and rcnn_cfg.DP_RATIO > 0: shared_fc_list.append(nn.Dropout(rcnn_cfg.DP_RATIO)) self.shared_fc_layer = nn.Sequential(*shared_fc_list) channel_in = rcnn_cfg.SHARED_FC[-1] # Classification layer cls_channel = 1 cls_layers = [] pre_channel = channel_in for k in range(0, rcnn_cfg.CLS_FC.__len__()): cls_layers.append(pt_utils.Conv1d(pre_channel, rcnn_cfg.CLS_FC[k], bn=True)) pre_channel = rcnn_cfg.CLS_FC[k] cls_layers.append(pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) if rcnn_cfg.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(rcnn_cfg.DP_RATIO)) self.cls_layer = nn.Sequential(*cls_layers) # Regression layer reg_layers = [] pre_channel = channel_in for k in range(0, rcnn_cfg.REG_FC.__len__()): reg_layers.append(pt_utils.Conv1d(pre_channel, rcnn_cfg.REG_FC[k], bn=True)) pre_channel = rcnn_cfg.REG_FC[k] reg_layers.append(pt_utils.Conv1d(pre_channel, self.box_coder.code_size, activation=None)) if rcnn_cfg.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(rcnn_cfg.DP_RATIO)) self.reg_layer = nn.Sequential(*reg_layers) self.roiaware_pool3d_layer = roiaware_pool3d_utils.RoIAwarePool3d( out_size=rcnn_cfg.ROI_AWARE_POOL_SIZE, max_pts_each_voxel=128 ) self.init_weights(weight_init='xavier')