def _make_deblock(self, num_out_filters, idx): print("CUSTOM MAKE DEBLOCK") stride = self._upsample_strides[idx - self._upsample_start_idx] if self._use_norm: if self._use_groupnorm: SparseBatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) DenseBatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) else: SparseBatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(ME.MinkowskiBatchNorm) DenseBatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) SparseConvTranspose2d = change_default_args(bias=False, dimension=2)( ME.MinkowskiConvolutionTranspose) DenseConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: SparseBatchNorm2d = Empty DenseBatchNorm2d = Empty SparseConvTranspose2d = change_default_args(bias=True, dimension=2)( ME.MinkowskiConvolutionTranspose) DenseConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) ReLU = ME.MinkowskiReLU() stride = np.round(stride).astype(np.int64) if (idx <= LAST_SPARSE_IDX): deblock = Sequential( SparseConvTranspose2d( num_out_filters, self._num_upsample_filters[idx - self._upsample_start_idx], stride, stride=stride), SparseBatchNorm2d( self._num_upsample_filters[idx - self._upsample_start_idx]), ReLU, ME.ToDense() ) else: stride = np.round(stride).astype(np.int64) deblock = Sequential( DenseConvTranspose2d( num_out_filters, self._num_upsample_filters[idx - self._upsample_start_idx], stride, stride=stride), DenseBatchNorm2d( self._num_upsample_filters[idx - self._upsample_start_idx]), ReLU, ) return deblock
def _make_layer(self, inplanes, planes, num_blocks, idx, stride=1): print("NUM BLOCKS:", num_blocks, "STRIDE:", stride) if self._use_norm: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(ME.MinkowskiBatchNorm) Conv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution) SubMConv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution) ConvTranspose2d = change_default_args(bias=False, dimension=2)( ME.MinkowskiConvolutionTranspose) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution) SubMConv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution) ConvTranspose2d = change_default_args(bias=True, dimension=2)( ME.MinkowskiConvolutionTranspose) ReLU = ME.MinkowskiReLU() block = Sequential( # PrintLayer(0), Conv2d(inplanes, planes, 2, stride=stride), BatchNorm2d(planes), ReLU, # PrintLayer(1), ) for j in range(num_blocks): block.add(SubMConv2d(planes, planes, 3)) block.add(BatchNorm2d(planes)), block.add(ReLU) # block.add(PrintLayer(2 + j)) return block, planes
def _make_layer(self, inplanes, planes, num_blocks, stride=1): if self._use_norm: if self._use_groupnorm: BatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) block = Sequential( nn.ZeroPad2d(1), Conv2d(inplanes, planes, 3, stride=stride), BatchNorm2d(planes), nn.ReLU(), ) for j in range(num_blocks): block.add(Conv2d(planes, planes, 3, padding=1)) block.add(BatchNorm2d(planes)) block.add(nn.ReLU()) return block, planes
def _make_deblock(self, num_out_filters, idx): stride = self._upsample_strides[idx - self._upsample_start_idx] if self._use_norm: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(ME.MinkowskiBatchNorm) Conv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution) ConvTranspose2d = change_default_args(bias=False, dimension=2)( ME.MinkowskiConvolutionTranspose) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution) ConvTranspose2d = change_default_args(bias=True, dimension=2)( ME.MinkowskiConvolutionTranspose) ReLU = ME.MinkowskiReLU() if stride >= 1: stride = np.round(stride).astype(np.int64) print("DEBLOCK CONV_TRANSPOSE STRIDE:", stride) deblock = Sequential( # PrintLayer(stride), ConvTranspose2d( num_out_filters, self._num_upsample_filters[idx - self._upsample_start_idx], stride, stride=stride), # PrintLayer(stride), BatchNorm2d( self._num_upsample_filters[idx - self._upsample_start_idx]), ReLU, ) else: stride = np.round(1 / stride).astype(np.int64) print("DEBLOCK CONV STRIDE:", stride) deblock = Sequential( # PrintLayer(stride), Conv2d( num_out_filters, self._num_upsample_filters[idx - self._upsample_start_idx], stride, stride=stride), # PrintLayer(stride), BatchNorm2d( self._num_upsample_filters[idx - self._upsample_start_idx]), ReLU, ) return deblock
def __init__(self, output_shape, use_norm=True, num_input_features=128, num_filters_down1=[64], num_filters_down2=[64, 64], name='MiddleExtractor'): super(MiddleExtractor, self).__init__() self.name = name if use_norm: BatchNorm3d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm3d) # BatchNorm3d = change_default_args( # group=32, eps=1e-3, momentum=0.01)(GroupBatchNorm3d) Conv3d = change_default_args(bias=False)(nn.Conv3d) else: BatchNorm3d = Empty Conv3d = change_default_args(bias=True)(nn.Conv3d) self.voxel_output_shape = output_shape self.middle_conv = Sequential( ZeroPad3d(1), Conv3d(num_input_features, 64, 3, stride=(2, 1, 1)), BatchNorm3d(64), nn.ReLU(), ZeroPad3d([1, 1, 1, 1, 0, 0]), Conv3d(64, 64, 3, stride=1), BatchNorm3d(64), nn.ReLU(), ZeroPad3d(1), Conv3d(64, 64, 3, stride=(2, 1, 1)), BatchNorm3d(64), nn.ReLU(), )
def __init__(self, output_shape, use_norm=True, num_input_features=128, num_filters_down1=[64], num_filters_down2=[64, 64], name='SparseMiddleExtractor'): super(SparseMiddleExtractor, self).__init__() self.name = name if use_norm: BatchNorm1d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm1d) Linear = change_default_args(bias=False)(nn.Linear) else: BatchNorm1d = Empty Linear = change_default_args(bias=True)(nn.Linear) sparse_shape = np.array(output_shape[1:4]) + [1, 0, 0] # sparse_shape[0] = 11 print(sparse_shape) self.scn_input = scn.InputLayer(3, sparse_shape.tolist()) self.voxel_output_shape = output_shape middle_layers = [] num_filters = [num_input_features] + num_filters_down1 # num_filters = [64] + num_filters_down1 filters_pairs_d1 = [[num_filters[i], num_filters[i + 1]] for i in range(len(num_filters) - 1)] for i, o in filters_pairs_d1: middle_layers.append(scn.SubmanifoldConvolution(3, i, o, 3, False)) middle_layers.append(scn.BatchNormReLU(o, eps=1e-3, momentum=0.99)) middle_layers.append( scn.Convolution( 3, num_filters[-1], num_filters[-1], (3, 1, 1), (2, 1, 1), bias=False)) middle_layers.append( scn.BatchNormReLU(num_filters[-1], eps=1e-3, momentum=0.99)) # assert len(num_filters_down2) > 0 if len(num_filters_down1) == 0: num_filters = [num_filters[-1]] + num_filters_down2 else: num_filters = [num_filters_down1[-1]] + num_filters_down2 filters_pairs_d2 = [[num_filters[i], num_filters[i + 1]] for i in range(len(num_filters) - 1)] for i, o in filters_pairs_d2: middle_layers.append(scn.SubmanifoldConvolution(3, i, o, 3, False)) middle_layers.append(scn.BatchNormReLU(o, eps=1e-3, momentum=0.99)) middle_layers.append( scn.Convolution( 3, num_filters[-1], num_filters[-1], (3, 1, 1), (2, 1, 1), bias=False)) middle_layers.append( scn.BatchNormReLU(num_filters[-1], eps=1e-3, momentum=0.99)) middle_layers.append(scn.SparseToDense(3, num_filters[-1])) self.middle_conv = Sequential(*middle_layers)
def _make_layer(self, inplanes, planes, num_blocks, idx, stride=1): if self._use_norm: if self._use_groupnorm: SparseBatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) DenseBatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) else: SparseBatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm1d) DenseBatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) SparseConv2d = change_default_args(bias=False)(spconv.SparseConv2d) DenseConv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( spconv.SparseConvTranspose2d) else: SparseBatchNorm2d = Empty DenseBatchNorm2d = Empty SparseConv2d = change_default_args(bias=True)(spconv.SparseConv2d) DenseConv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( spconv.SparseConvTranspose2d) print("STRIDE:", stride) if idx <= LAST_SPARSE_IDX: block = spconv.SparseSequential( SparseZeroPad2d(1), SparseConv2d(inplanes, planes, 3, stride=stride), SparseBatchNorm2d(planes), nn.ReLU(), ) for j in range(num_blocks): block.add(SparseConv2d(planes, planes, 3, padding=1)) block.add(SparseBatchNorm2d(planes)) block.add(nn.ReLU()) else: block = Sequential( nn.ZeroPad2d(1), DenseConv2d(inplanes, planes, 3, stride=stride), DenseBatchNorm2d(planes), nn.ReLU(), ) for j in range(num_blocks): block.add(DenseConv2d(planes, planes, 3, padding=1)) block.add(DenseBatchNorm2d(planes)) block.add(nn.ReLU()) return block, planes
def __init__(self): super(fusion, self).__init__() self.name = 'fusion_layer' self.corner_points_feature = Sequential( nn.Conv2d(24, 48, 1), nn.ReLU(), nn.Conv2d(48, 96, 1), nn.ReLU(), nn.Conv2d(96, 96, 1), nn.ReLU(), nn.Conv2d(96, 4, 1), ) self.fuse_2d_3d = Sequential( nn.Conv2d(4, 18, 1), nn.ReLU(), nn.Conv2d(18, 36, 1), nn.ReLU(), nn.Conv2d(36, 36, 1), nn.ReLU(), nn.Conv2d(36, 1, 1), ) self.maxpool = Sequential(nn.MaxPool2d([200, 1], 1), )
class RPN_refine(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): super(RPN_refine, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) self._box_code_size = box_code_size self._num_class = num_class self._num_direction_bins = num_direction_bins upsample_strides = [ np.round(u).astype(np.int64) for u in upsample_strides ] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls_coarse = nn.Conv2d(num_upsample_filters[0], num_cls, 1) self.conv_box_coarse = nn.Conv2d(num_upsample_filters[0], num_anchor_per_loc * box_code_size, 1) self.conv_cls = nn.Conv2d(num_upsample_filters[0], num_cls, 1) self.conv_box = nn.Conv2d(num_upsample_filters[0], num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( num_upsample_filters[0], num_anchor_per_loc * num_direction_bins, 1) def forward(self, x): H, W = x.shape[2:] box_refine = self.conv_box_coarse(x) box_refine = box_refine.view(-1, self._num_anchor_per_loc, self._box_code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_constraint = self.conv_cls_coarse(x) cls_constraint = cls_constraint.view(-1, self._num_anchor_per_loc, self._num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() x = self.block1(x) up1 = self.deconv1(x) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = up1 + up2 + up3 box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.view(-1, self._num_anchor_per_loc, self._box_code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_preds = cls_preds.view(-1, self._num_anchor_per_loc, self._num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() ret_dict = { "box_refine": box_refine, "cls_constraint": cls_constraint, "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.view(-1, self._num_anchor_per_loc, self._num_direction_bins, H, W).permute(0, 1, 3, 4, 2).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds return x, ret_dict
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 3, 3, 3], layer_strides=[2, 2, 2, 2], num_filters=[ 64, 128, 256, 512 ], upsample_strides=[1, 2, 4, 4], num_upsample_filters= [ 64, 128, 256, 256, 448 ], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='det_net', **kwargs): super(det_net, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev assert len(layer_nums) == 4 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) # print('use norm or not') # print(use_norm) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args( num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) dimension_feature_map = num_filters #[ 64, 128, 256, 512] # [256, 512, 512, 512] dimension_concate = num_upsample_filters # last one for final output # =============================================================== # block0 # ============================================================== flag = 0 middle_layers = [] for i in range(layer_nums[flag]): middle_layers.append( Conv2d(dimension_feature_map[flag], dimension_feature_map[flag], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_feature_map[flag]) ) middle_layers.append( nn.ReLU() ) self.block0 = Sequential(*middle_layers) middle_layers = [] self.downsample0 = Sequential( Conv2d(dimension_feature_map[flag], dimension_concate[flag], 3, stride = 2 ), BatchNorm2d(dimension_concate[flag]), nn.ReLU(), ) # =============================================================== # block1 # ============================================================== flag = 1 middle_layers = [] for i in range(layer_nums[flag]): middle_layers.append( Conv2d(dimension_feature_map[flag], dimension_feature_map[flag], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_feature_map[flag]) ) middle_layers.append( nn.ReLU() ) self.block1 = Sequential(*middle_layers) # =============================================================== # block2 # ============================================================== flag = 2 middle_layers = [] for i in range(layer_nums[flag]): middle_layers.append( Conv2d(dimension_feature_map[flag], dimension_feature_map[flag], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_feature_map[flag]) ) middle_layers.append( nn.ReLU() ) self.block2 = Sequential(*middle_layers) # =============================================================== # block3 # ============================================================== flag = 3 middle_layers = [] for i in range(layer_nums[flag]): middle_layers.append( Conv2d(dimension_feature_map[flag], dimension_feature_map[flag], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_feature_map[flag]) ) middle_layers.append( nn.ReLU() ) self.block3 = Sequential(*middle_layers) self.upsample3 = Sequential( ConvTranspose2d(dimension_feature_map[flag], dimension_concate[flag], 3, stride = 2 ), BatchNorm2d( dimension_concate[flag] ), nn.ReLU(), ) # ============================================================== # convlution after concatating block3 and block2 # ============================================================== middle_layers = [] middle_layers.append( Conv2d( (dimension_concate[3]+dimension_feature_map[2]), dimension_concate[2], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_concate[2]) ) middle_layers.append( nn.ReLU() ) middle_layers.append( Conv2d(dimension_concate[2] , dimension_concate[2], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_concate[2]) ) middle_layers.append( nn.ReLU() ) # upsampling middle_layers.append( ConvTranspose2d( dimension_concate[2] , dimension_concate[2], 3, stride = 2)) middle_layers.append( BatchNorm2d(dimension_concate[2]) ) middle_layers.append( nn.ReLU() ) self.upsample2_after_concate_fuse32 = Sequential(*middle_layers) # ============================================================== # convlution after concatating block2, block1 and block0 # ============================================================== middle_layers = [] middle_layers.append( Conv2d( ( dimension_concate[0] + dimension_feature_map[1] + dimension_concate[2]), dimension_concate[4], 3, padding=1)) middle_layers.append( BatchNorm2d(dimension_concate[4]) ) middle_layers.append( nn.ReLU() ) middle_layers.append( Conv2d(dimension_concate[4] , dimension_concate[4], 3, padding=1)) middle_layers.append( BatchNorm2d( dimension_concate[4] )) middle_layers.append( nn.ReLU() ) self.output_after_concate_fuse210 = Sequential(*middle_layers) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d( dimension_concate[4], num_cls, 1) self.conv_box = nn.Conv2d( dimension_concate[4] , num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( dimension_concate[4], num_anchor_per_loc * 2, 1)
def __init__(self, output_shape, num_input_features=128, num_filters_down1=[64], num_filters_down2=[64, 64], use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='sparse_rpn'): super(SparseRPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self.name = name if use_norm: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm2d) BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d) Conv2d = change_default_args(bias=False)(nn.Conv2d) SpConv3d = change_default_args(bias=False)(spconv.SparseConv3d) SubMConv3d = change_default_args(bias=False)(spconv.SubMConv3d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty BatchNorm1d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) SpConv3d = change_default_args(bias=True)(spconv.SparseConv3d) SubMConv3d = change_default_args(bias=True)(spconv.SubMConv3d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) sparse_shape = np.array(output_shape[1:4]) + [1, 0, 0] # sparse_shape[0] = 11 print(sparse_shape) self.sparse_shape = sparse_shape self.voxel_output_shape = output_shape # [11, 400, 352] self.block1 = spconv.SparseSequential( SpConv3d(num_input_features, num_filters[0], 3, stride=[2, layer_strides[0], layer_strides[0]], padding=[0, 1, 1]), BatchNorm1d(num_filters[0]), nn.ReLU()) # [5, 200, 176] for i in range(layer_nums[0]): self.block1.add( SubMConv3d(num_filters[0], num_filters[0], 3, padding=1, indice_key="subm0")) self.block1.add(BatchNorm1d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = spconv.SparseSequential( SpConv3d(num_filters[0], num_filters[0], (3, 1, 1), stride=(2, 1, 1)), BatchNorm1d(num_filters[0]), nn.ReLU(), SpConv3d(num_filters[0], num_upsample_filters[0], (2, 1, 1), stride=1), BatchNorm1d(num_upsample_filters[0]), nn.ReLU(), spconv.ToDense(), Squeeze()) # [1, 200, 176] # [5, 200, 176] self.block2 = spconv.SparseSequential( SpConv3d(num_filters[0], num_filters[1], 3, stride=[2, layer_strides[1], layer_strides[1]], padding=[0, 1, 1]), BatchNorm1d(num_filters[1]), nn.ReLU()) for i in range(layer_nums[1]): self.block2.add( SubMConv3d(num_filters[1], num_filters[1], 3, padding=1, indice_key="subm1")) self.block2.add(BatchNorm1d(num_filters[1])) self.block2.add(nn.ReLU()) # [2, 100, 88] self.deconv2 = spconv.SparseSequential( SpConv3d(num_filters[1], num_filters[1], (2, 1, 1), stride=1), BatchNorm1d(num_filters[1]), nn.ReLU(), spconv.ToDense(), Squeeze(), ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU()) # [1, 200, 176] self.block3 = spconv.SparseSequential( SpConv3d(num_filters[1], num_filters[2], [2, 3, 3], stride=[1, layer_strides[2], layer_strides[2]], padding=[0, 1, 1]), BatchNorm1d(num_filters[2]), nn.ReLU()) for i in range(layer_nums[2]): self.block3.add( SubMConv3d(num_filters[2], num_filters[2], 3, padding=1, indice_key="subm2")) self.block3.add(BatchNorm1d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( spconv.ToDense(), Squeeze(), ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) # [1, 200, 176] self.post = Sequential( Conv2d(sum(num_upsample_filters), 128, 3, stride=1, padding=1), BatchNorm2d(128), nn.ReLU(), Conv2d(128, 64, 3, stride=1, padding=1), BatchNorm2d(64), nn.ReLU(), ) # [1, 200, 176] if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) '''self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * 2, 1) ''' self.conv_cls = nn.Conv2d(64, num_cls, 1) self.conv_box = nn.Conv2d(64, num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(64, num_anchor_per_loc * 2, 1)
def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): """deprecated. exists for checkpoint backward compilability (SECOND v1.0) """ super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) upsample_strides = [ np.round(u).astype(np.int64) for u in upsample_strides ] factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args( num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d( num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d( num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d( block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d( num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d( num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * num_direction_bins, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='rpn'): super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_filters, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1)
def __init__(self, output_shape, use_norm=True, num_filters_down1=[64, 128, 256, 512], num_filters_down2=[64, 128, 256, 512], name='tDBN_1'): super(tDBN_1, self).__init__() self.name = name if use_norm: BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d) Linear = change_default_args(bias=False)(nn.Linear) else: BatchNorm1d = Empty Linear = change_default_args(bias=True)(nn.Linear) sparse_shape = np.array(output_shape[1:4]) # + [1, 0, 0] # print(sparse_shape) self.scn_input = scn.InputLayer(3, sparse_shape.tolist()) self.voxel_output_shape = output_shape middle_layers = [] num_filter_fpn = num_filters_down1 # [ 64, 128, 256, 512] #dimension of feature maps, num_filter_fpn[3] == dimension_feature_map[3] dimension_feature_map = num_filters_down2 # [ 64, 128, 256, 512] # dimensions of output into 2D feature map dimension_kernel_size = [ 15, 7, 3, 1 ] #(input.spatial_size - self.filter_size) / self.filter_stride + 1 # ----------------------------------------------------------------- ## block1 and feature map 0, convert from voxel into 3D tensor # ----------------------------------------------------------------- for i, o in [[128, num_filter_fpn[0]] ]: #, [num_filter_fpn[0], num_filter_fpn[0]]]: middle_layers.append(scn.SubmanifoldConvolution(3, i, o, 3, False)) middle_layers.append(scn.BatchNormReLU(o, eps=1e-3, momentum=0.99)) self.block0 = Sequential(*middle_layers) middle_layers = [] # downsampling with one 15*15 kernel, to compress 15 dimension into 1 # dimension, 64*15*399*351 ---> 256*1*399*351 middle_layers.append( scn.Convolution( 3, num_filter_fpn[0], dimension_feature_map[0], (dimension_kernel_size[0], 1, 1), (2, 1, 1), #in:7, out:5 bias=False)) middle_layers.append( scn.BatchNormReLU(dimension_feature_map[0], eps=1e-3, momentum=0.99)) ## compressed into a birdview, height dimension into 1 middle_layers.append(scn.SparseToDense(3, dimension_feature_map[0])) self.feature_map0 = Sequential(*middle_layers) ## feature map1 middle_layers = [] # -------------------------------------------------------------- ## block1-3 and feature map1-3 # -------------------------------------------------------------- for k in range(1, 4): ## reduce the resolution middle_layers.append( scn.Convolution(3, num_filter_fpn[k - 1], num_filter_fpn[k], (3, 3, 3), (2, 2, 2), bias=False)) middle_layers.append( scn.BatchNormReLU(num_filter_fpn[k], eps=1e-3, momentum=0.99)) # 128*7*199*175 recurrent for i, o in [[num_filter_fpn[k], num_filter_fpn[k]], [num_filter_fpn[k], num_filter_fpn[k]]]: middle_layers.append( scn.SubmanifoldConvolution(3, i, o, 3, False)) middle_layers.append( scn.BatchNormReLU(o, eps=1e-3, momentum=0.99)) if k == 1: self.block1 = Sequential(*middle_layers) elif k == 2: self.block2 = Sequential(*middle_layers) elif k == 3: self.block3 = Sequential(*middle_layers) middle_layers = [] ## compressed into a birdview, height dimension into 1 middle_layers.append( scn.Convolution(3, num_filter_fpn[k], dimension_feature_map[k], (dimension_kernel_size[k], 1, 1), (1, 1, 1), bias=False)) middle_layers.append( scn.BatchNormReLU(dimension_feature_map[k], eps=1e-3, momentum=0.99)) middle_layers.append( scn.SparseToDense(3, dimension_feature_map[k]) ) ## convert a SparseConvNet hidden layer to a dense convolutional layer if k == 1: self.feature_map1 = Sequential(*middle_layers) # in:7, out:5 elif k == 2: self.feature_map2 = Sequential(*middle_layers) # in:3, out:1 # self.feature_map2 = Sequential(scn.SparseToDense(3, dimension_feature_map[k])) elif k == 3: #self.feature_map3 = Sequential(*middle_layers) # XXX self.feature_map3 = Sequential( scn.SparseToDense(3, dimension_feature_map[k]) ) ## last one is the 2D instead of 3D middle_layers = []
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='rpn'): super(RPN_FUSION, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._use_rc_net = use_rc_net # assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) """ factors = [] for i in range(len(layer_nums)): assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) """ if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) in_filters = [num_input_features, *num_filters[:-1]] # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. blocks = [] deblocks = [] for i, layer_num in enumerate(layer_nums): # in_f = 256 if i == 0 else in_filters[i] in_f = in_filters[i] block = Sequential( nn.ZeroPad2d(1), Conv2d(in_f, num_filters[i], 3, stride=layer_strides[i]), BatchNorm2d(num_filters[i]), nn.ReLU(), ) for j in range(layer_num): block.add(Conv2d(num_filters[i], num_filters[i], 3, padding=1)) block.add(BatchNorm2d(num_filters[i])) block.add(nn.ReLU()) blocks.append(block) deblock = Sequential( ConvTranspose2d(num_filters[i], num_upsample_filters[i], upsample_strides[i], stride=upsample_strides[i]), BatchNorm2d(num_upsample_filters[i]), nn.ReLU(), ) deblocks.append(deblock) self.blocks = nn.ModuleList(blocks) self.deblocks = nn.ModuleList(deblocks) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) ######################### det_num = sum(num_upsample_filters) ######################### self.conv_cls = nn.Conv2d(det_num, num_cls, 1) self.conv_box = nn.Conv2d(det_num, num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(det_num, num_anchor_per_loc * 2, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d(det_num, num_anchor_per_loc * box_code_size, 1) ########################################################## self.f_in_planes_det = 64 net_type = 'FPN18' if net_type == 'FPN50': num_blocks = [3, 4, 6, 3] bb_block = Bottleneck elif net_type == 'FPN18': num_blocks = [2, 2, 2, 2] bb_block = BasicBlock # For RGB Feature Network self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer_det(bb_block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer_det(bb_block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer_det(bb_block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer_det(bb_block, 512, num_blocks[3], stride=2) if net_type == 'FPN18': fpn_sizes = [ self.layer2[1].conv2.out_channels, self.layer3[1].conv2.out_channels, self.layer4[1].conv2.out_channels ] else: fpn_sizes = [ self.layer2[num_blocks[1] - 1].conv3.out_channels, self.layer3[num_blocks[2] - 1].conv3.out_channels, self.layer4[num_blocks[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) #################################################################### # Fusion Layer num_z_feat = 3 n_feats = 128 self.rgb_refine = Sequential( nn.Conv2d(256 * num_z_feat, 256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, n_feats, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(n_feats), nn.ReLU(), ) self.fusion_refine = Sequential( nn.Conv2d(n_feats * 2, n_feats * 2, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(n_feats * 2), nn.ReLU(), nn.Conv2d(n_feats * 2, n_feats, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(n_feats), nn.ReLU(), ) self.bev_gate = BasicGate(n_feats) self.crop_gate = BasicGate(n_feats)
def __init__(self, use_norm=True, num_class=2, layer_nums=(1, 1), layer_strides=(1, 2), num_filters=(256, 256), upsample_strides=(1, 2), num_upsample_filters=(128, 128), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, num_direction_bins=2): super().__init__() self.name = 'Sp2RPN2' self._num_class = num_class self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._num_direction_bins = num_direction_bins self._box_code_size = box_code_size self._use_bev = use_bev assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) # upsample_strides is defined as double... upsample_strides = [int(s) for s in upsample_strides] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) self.block1 = Sequential() for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential() for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1)
class Sp2RPN(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=(1, 1), layer_strides=(1, 2), num_filters=(256, 256), upsample_strides=(1, 2), num_upsample_filters=(128, 128), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, num_direction_bins=2): super().__init__() self.name = 'Sp2RPN2' self._num_class = num_class self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._num_direction_bins = num_direction_bins self._box_code_size = box_code_size self._use_bev = use_bev assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) # upsample_strides is defined as double... upsample_strides = [int(s) for s in upsample_strides] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) self.block1 = Sequential() for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential() for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) def forward(self, x): x1, x2 = x x1 = self.block1(x1) up1 = self.deconv1(x1) x2 = self.block2(x2) up2 = self.deconv2(x2) x_cat = torch.cat([up1, up2], dim=1) box_preds = self.conv_box(x_cat) cls_preds = self.conv_cls(x_cat) # [N, C, y(H), x(W)] C, H, W = box_preds.shape[1:] box_preds = box_preds.view(-1, self._num_anchor_per_loc, self._box_code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_preds = cls_preds.view(-1, self._num_anchor_per_loc, self._num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() # box_preds = box_preds.permute(0, 2, 3, 1).contiguous() # cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x_cat) dir_cls_preds = dir_cls_preds.view(-1, self._num_anchor_per_loc, self._num_direction_bins, H, W).permute(0, 1, 3, 4, 2).contiguous() # dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds return ret_dict
def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): super(RPN_refine, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) self._box_code_size = box_code_size self._num_class = num_class self._num_direction_bins = num_direction_bins upsample_strides = [ np.round(u).astype(np.int64) for u in upsample_strides ] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls_coarse = nn.Conv2d(num_upsample_filters[0], num_cls, 1) self.conv_box_coarse = nn.Conv2d(num_upsample_filters[0], num_anchor_per_loc * box_code_size, 1) self.conv_cls = nn.Conv2d(num_upsample_filters[0], num_cls, 1) self.conv_box = nn.Conv2d(num_upsample_filters[0], num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( num_upsample_filters[0], num_anchor_per_loc * num_direction_bins, 1)
def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): """upsample_strides support float: [0.25, 0.5, 1] if upsample_strides < 1, conv2d will be used instead of convtranspose2d. """ super(Two_RPNNoHeadBase_PSA, self).__init__() self._layer_strides = layer_strides self._num_filters = num_filters self._layer_nums = layer_nums self._upsample_strides = upsample_strides self._num_upsample_filters = num_upsample_filters self._num_input_features = num_input_features self._use_norm = use_norm self._use_groupnorm = use_groupnorm self._num_groups = num_groups assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(num_upsample_filters) == len(upsample_strides) self._upsample_start_idx = len(layer_nums) - len(upsample_strides) must_equal_list = [] for i in range(len(upsample_strides)): must_equal_list.append( upsample_strides[i] / np.prod(layer_strides[:i + self._upsample_start_idx + 1])) for val in must_equal_list: assert val == must_equal_list[0] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) in_filters = [num_input_features, *num_filters[:-1]] blocks = [] deblocks = [] for i, layer_num in enumerate(layer_nums): block, num_out_filters = self._make_layer(in_filters[i], num_filters[i], layer_num, stride=layer_strides[i]) blocks.append(block) if i - self._upsample_start_idx >= 0: stride = upsample_strides[i - self._upsample_start_idx] if stride >= 1: stride = np.round(stride).astype(np.int64) deblock = nn.Sequential( ConvTranspose2d( num_out_filters, num_upsample_filters[i - self._upsample_start_idx], stride, stride=stride), BatchNorm2d( num_upsample_filters[i - self._upsample_start_idx]), nn.ReLU(), ) else: stride = np.round(1 / stride).astype(np.int64) deblock = nn.Sequential( Conv2d(num_out_filters, num_upsample_filters[i - self._upsample_start_idx], stride, stride=stride), BatchNorm2d( num_upsample_filters[i - self._upsample_start_idx]), nn.ReLU(), ) deblocks.append(deblock) self._num_out_filters = num_out_filters self.blocks = nn.ModuleList(blocks) self.deblocks = nn.ModuleList(deblocks) ### Refine: self.bottle_conv = nn.Conv2d(sum(num_upsample_filters), sum(num_upsample_filters) // 3, 1) self.block1_dec2x = nn.MaxPool2d(kernel_size=2) ### C=64 self.block1_dec4x = nn.MaxPool2d(kernel_size=4) ### C=64 self.block2_dec2x = nn.MaxPool2d(kernel_size=2) ### C=128 self.block2_inc2x = ConvTranspose2d(num_filters[1], num_filters[0] // 2, 2, stride=2) self.block3_inc2x = ConvTranspose2d(num_filters[2], num_filters[1] // 2, 2, stride=2) self.block3_inc4x = ConvTranspose2d(num_filters[2], num_filters[0] // 2, 4, stride=4) #### C=32 if upsample_strides[0] < 1: stride0 = np.round(1.0 / upsample_strides[0]).astype(np.int64) self.refine_up1 = Sequential( Conv2d(num_filters[0], num_upsample_filters[0], stride0, stride=stride0), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) else: stride0 = np.round(upsample_strides[0]).astype(np.int64) self.refine_up1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], stride0, stride=stride0), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) if upsample_strides[1] < 1: stride1 = np.round(1.0 / upsample_strides[1]).astype(np.int64) self.refine_up2 = Sequential( Conv2d(num_filters[1], num_upsample_filters[1], stride1, stride=stride1), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) else: stride1 = np.round(upsample_strides[1]).astype(np.int64) self.refine_up2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], stride1, stride=stride1), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) if upsample_strides[2] < 1: stride2 = np.round(1.0 / upsample_strides[2]).astype(np.int64) self.refine_up3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], stride2, stride=stride2), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) else: stride2 = np.round(upsample_strides[2]).astype(np.int64) self.refine_up3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], stride2, stride=stride2), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) self.fusion_block1 = nn.Conv2d( num_filters[0] + num_filters[0] // 2 + num_filters[0] // 2, num_filters[0], 1) self.fusion_block2 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[1] // 2, num_filters[1], 1) self.fusion_block3 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[2], num_filters[2], 1) ####### C_Bottle = cfg.PSA.C_Bottle C = cfg.PSA.C_Reudce self.RF1 = Sequential( # 3*3 Conv2d(C_Bottle * 2, C, kernel_size=1, stride=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle * 2, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle * 2), nn.ReLU(inplace=True), ) self.RF2 = Sequential( # 5*5 Conv2d(C_Bottle, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle), nn.ReLU(inplace=True), ) self.RF3 = Sequential( # 7*7 Conv2d(C_Bottle // 2, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle // 2, kernel_size=3, stride=1, padding=1), BatchNorm2d(C_Bottle // 2), nn.ReLU(inplace=True), ) self.concat_conv1 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) ## kernel_size=3 self.concat_conv2 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.concat_conv3 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1)
def __init__(self, output_shape, use_norm=True, num_input_features=128, num_filters_down1=[64], num_filters_down2=[64, 64], name='SpMiddleFHD'): super(SpMiddleFHD_full3d, self).__init__() self.name = name if use_norm: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm2d) BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d) BatchNorm3d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm3d) Conv2d = change_default_args(bias=False)(nn.Conv2d) SpConv3d = change_default_args(bias=False)(spconv.SparseConv3d) SubMConv3d = change_default_args(bias=False)(spconv.SubMConv3d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty BatchNorm1d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) SpConv3d = change_default_args(bias=True)(spconv.SparseConv3d) SubMConv3d = change_default_args(bias=True)(spconv.SubMConv3d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) sparse_shape = np.array(output_shape[1:4]) + [1, 0, 0] # sparse_shape[0] = 11 print(sparse_shape) self.sparse_shape = sparse_shape self.voxel_output_shape = output_shape # input: # [1600, 1200, 41] self.middle_conv = spconv.SparseSequential( SubMConv3d(num_input_features, 16, 3, indice_key="subm0"), BatchNorm1d(16), nn.ReLU(), SubMConv3d(16, 16, 3, indice_key="subm0"), BatchNorm1d(16), nn.ReLU(), SpConv3d(16, 32, 3, 2, padding=1), # [1600, 1200, 41] -> [800, 600, 21] BatchNorm1d(32), nn.ReLU(), SubMConv3d(32, 32, 3, indice_key="subm1"), BatchNorm1d(32), nn.ReLU(), # SubMConv3d(32, 32, 3, indice_key="subm1"), # BatchNorm1d(32), # nn.ReLU(), SpConv3d(32, 64, 3, 2, padding=1), # [800, 600, 21] -> [400, 300, 11] BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm2"), BatchNorm1d(64), nn.ReLU(), # SubMConv3d(64, 64, 3, indice_key="subm2"), # BatchNorm1d(64), # nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm2"), BatchNorm1d(64), nn.ReLU(), #################### SpConv3d(64, 64, 3, [1, 2, 2], padding=[1, 1, 1]), # [400, 300, 11] -> [200, 150, 11] BatchNorm1d(64), nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm3"), BatchNorm1d(64), nn.ReLU(), # SubMConv3d(64, 64, 3, indice_key="subm3"), # BatchNorm1d(64), # nn.ReLU(), SubMConv3d(64, 64, 3, indice_key="subm3"), BatchNorm1d(64), nn.ReLU(), # SpConv3d(64, 64, (3, 1, 1), # (2, 1, 1)), # [200, 150, 11] -> [200, 150, 2] # BatchNorm1d(64), # nn.ReLU(), ) self.full_conv = Sequential( torch.nn.Conv3d(64, 64, 3, [2, 1, 1], padding=[0, 1, 1], bias=False), # [400, 300, 11] -> [200, 150, 5] BatchNorm3d(64), nn.ReLU(), torch.nn.Conv3d(64, 64, 3, [1, 1, 1], padding=1, bias=False), # [400, 300, 5] -> [200, 150, 5] BatchNorm3d(64), nn.ReLU(), torch.nn.Conv3d(64, 64, 3, [2, 1, 1], padding=[0, 1, 1], bias=False), # [400, 300, 11] -> [200, 150, 2] BatchNorm3d(64), nn.ReLU(), ) # [200, 150, 11] -> [200, 150, 2] self.max_batch_size = 6
class RPN(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='rpn'): super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_filters, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) def forward(self, x, bev=None): x = self.block1(x) up1 = self.deconv1(x) if self._use_bev: bev[:, -1] = torch.clamp(torch.log(1 + bev[:, -1]) / np.log(16.0), max=1.0) x = torch.cat([x, self.bev_extractor(bev)], dim=1) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = torch.cat([up1, up2, up3], dim=1) box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds return ret_dict
class PSA(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='psa'): """ :param use_norm: :param num_class: :param layer_nums: :param layer_strides: :param num_filters: :param upsample_strides: :param num_upsample_filters: :param num_input_filters: :param num_anchor_per_loc: :param encode_background_as_zeros: :param use_direction_classifier: :param use_groupnorm: :param num_groups: :param use_bev: :param box_code_size: :param name: """ super(PSA, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc ## 2 self._use_direction_classifier = use_direction_classifier # True self._use_bev = use_bev # False assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: # True if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_filters, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) ################### refine self.bottle_conv = nn.Conv2d(sum(num_upsample_filters), sum(num_upsample_filters) // 3, 1) self.block1_dec2x = nn.MaxPool2d(kernel_size=2) ### C=64 self.block1_dec4x = nn.MaxPool2d(kernel_size=4) ### C=64 self.block2_dec2x = nn.MaxPool2d(kernel_size=2) ### C=128 self.block2_inc2x = ConvTranspose2d( num_filters[1], num_filters[0] // 2, upsample_strides[1], stride=upsample_strides[1]) ### C=32 self.block3_inc2x = ConvTranspose2d( num_filters[2], num_filters[1] // 2, upsample_strides[1], stride=upsample_strides[1]) #### C=64 self.block3_inc4x = ConvTranspose2d( num_filters[2], num_filters[0] // 2, upsample_strides[2], stride=upsample_strides[2]) #### C=32 self.fusion_block1 = nn.Conv2d( num_filters[0] + num_filters[0] // 2 + num_filters[0] // 2, num_filters[0], 1) self.fusion_block2 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[1] // 2, num_filters[1], 1) self.fusion_block3 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[2], num_filters[2], 1) self.refine_up1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.refine_up2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.refine_up3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) ####### C_Bottle = cfg.PSA.C_Bottle C = cfg.PSA.C_Reudce self.RF1 = Sequential( # 3*3 Conv2d(C_Bottle * 2, C, kernel_size=1, stride=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle * 2, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle * 2), nn.ReLU(inplace=True), ) self.RF2 = Sequential( # 5*5 Conv2d(C_Bottle, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle), nn.ReLU(inplace=True), ) self.RF3 = Sequential( # 7*7 Conv2d(C_Bottle // 2, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle // 2, kernel_size=3, stride=1, padding=1), BatchNorm2d(C_Bottle // 2), nn.ReLU(inplace=True), ) self.concat_conv1 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) ## kernel_size=3 self.concat_conv2 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.concat_conv3 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.refine_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.refine_loc = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.refine_dir = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) def forward(self, x, bev=None): x1 = self.block1(x) up1 = self.deconv1(x1) x2 = self.block2(x1) up2 = self.deconv2(x2) x3 = self.block3(x2) up3 = self.deconv3(x3) coarse_feat = torch.cat([up1, up2, up3], dim=1) box_preds = self.conv_box(coarse_feat) cls_preds = self.conv_cls(coarse_feat) # [N, C, y(H), x(W)] box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(coarse_feat) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds ###############Refine: blottle_conv = self.bottle_conv(coarse_feat) x1_dec2x = self.block1_dec2x(x1) x1_dec4x = self.block1_dec4x(x1) x2_dec2x = self.block2_dec2x(x2) x2_inc2x = self.block2_inc2x(x2) x3_inc2x = self.block3_inc2x(x3) x3_inc4x = self.block3_inc4x(x3) concat_block1 = torch.cat([x1, x2_inc2x, x3_inc4x], dim=1) fusion_block1 = self.fusion_block1(concat_block1) concat_block2 = torch.cat([x1_dec2x, x2, x3_inc2x], dim=1) fusion_block2 = self.fusion_block2(concat_block2) concat_block3 = torch.cat([x1_dec4x, x2_dec2x, x3], dim=1) fusion_block3 = self.fusion_block3(concat_block3) refine_up1 = self.RF3(fusion_block1) refine_up1 = self.refine_up1(refine_up1) refine_up2 = self.RF2(fusion_block2) refine_up2 = self.refine_up2(refine_up2) refine_up3 = self.RF1(fusion_block3) refine_up3 = self.refine_up3(refine_up3) branch1_sum_wise = refine_up1 + blottle_conv branch2_sum_wise = refine_up2 + blottle_conv branch3_sum_wise = refine_up3 + blottle_conv concat_conv1 = self.concat_conv1(branch1_sum_wise) concat_conv2 = self.concat_conv2(branch2_sum_wise) concat_conv3 = self.concat_conv3(branch3_sum_wise) PSA_output = torch.cat([concat_conv1, concat_conv2, concat_conv3], dim=1) refine_cls_preds = self.refine_cls(PSA_output) refine_loc_preds = self.refine_loc(PSA_output) refine_loc_preds = refine_loc_preds.permute(0, 2, 3, 1).contiguous() refine_cls_preds = refine_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["Refine_loc_preds"] = refine_loc_preds ret_dict["Refine_cls_preds"] = refine_cls_preds if self._use_direction_classifier: refine_dir_preds = self.refine_dir(PSA_output) refine_dir_preds = refine_dir_preds.permute(0, 2, 3, 1).contiguous() ret_dict["Refine_dir_preds"] = refine_dir_preds return ret_dict
class RPN(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): """deprecated. exists for checkpoint backward compilability (SECOND v1.0) """ super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) upsample_strides = [ np.round(u).astype(np.int64) for u in upsample_strides ] factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args( num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d( num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d( num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d( block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d( num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d( num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * num_direction_bins, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) def forward(self, x): # t = time.time() # torch.cuda.synchronize() x = self.block1(x) up1 = self.deconv1(x) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = torch.cat([up1, up2, up3], dim=1) box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds if self._use_rc_net: rc_preds = self.conv_rc(x) rc_preds = rc_preds.permute(0, 2, 3, 1).contiguous() ret_dict["rc_preds"] = rc_preds # torch.cuda.synchronize() # print("rpn forward time", time.time() - t) return ret_dict
def __init__(self, use_norm=True, num_class=2, img_input_channel=3, img_extractor_layer_nums=[2, 3], layer_strides=[2, 2], num_filters=[32, 64], upsample_strides=[1, 2], num_upsample_filters=[128, 128], num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, name='img_extractor_SSD_like'): super(img_extractor_VGG16, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_strides) == len(img_extractor_layer_nums) assert len(num_filters) == len(img_extractor_layer_nums) assert len(upsample_strides) == len(img_extractor_layer_nums) assert len(num_upsample_filters) == len(img_extractor_layer_nums) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) self.block1 = Sequential() for i in range(img_extractor_layer_nums[0]): if i == 0: block1_in = 3 else: block1_in = num_filters[0] self.block1.add(Conv2d(block1_in, num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU(inplace=False)) self.block1.add(torch.nn.MaxPool2d(kernel_size=2, stride=2)) self.block2 = Sequential() for i in range(img_extractor_layer_nums[1]): if i == 0: block2_in = num_filters[0] else: block2_in = num_filters[1] self.block2.add(Conv2d(block2_in, num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU(inplace=False)) self.block2.add(torch.nn.MaxPool2d(kernel_size=2, stride=2)) self.block3 = Sequential() for i in range(img_extractor_layer_nums[2]): if i == 0: block2_in = num_filters[1] else: block2_in = num_filters[2] self.block2.add(Conv2d(block2_in, num_filters[2], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[2])) self.block2.add(nn.ReLU(inplace=False)) self.block2.add(torch.nn.MaxPool2d(kernel_size=2, stride=2))
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='rpn'): super(RPNV2, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._use_rc_net = use_rc_net # assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) """ factors = [] for i in range(len(layer_nums)): assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) """ if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) in_filters = [num_input_features, *num_filters[:-1]] # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. blocks = [] deblocks = [] for i, layer_num in enumerate(layer_nums): block = Sequential( nn.ZeroPad2d(1), Conv2d(in_filters[i], num_filters[i], 3, stride=layer_strides[i]), BatchNorm2d(num_filters[i]), nn.ReLU(), ) for j in range(layer_num): block.add(Conv2d(num_filters[i], num_filters[i], 3, padding=1)) block.add(BatchNorm2d(num_filters[i])) block.add(nn.ReLU()) blocks.append(block) deblock = Sequential( ConvTranspose2d(num_filters[i], num_upsample_filters[i], upsample_strides[i], stride=upsample_strides[i]), BatchNorm2d(num_upsample_filters[i]), nn.ReLU(), ) deblocks.append(deblock) self.blocks = nn.ModuleList(blocks) self.deblocks = nn.ModuleList(deblocks) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
def __init__(self, in_channels=128, num_of_convs=4, prior_prob=0.01, use_norm=True, num_class=2, num_convs=12, layer_nums=(3, 5, 5), layer_strides=(1, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, name='rpn'): """ Arguments: input = (batch, channel, x, y) output = ret_dict in_channels (int): number of channels of the input feature """ super(res_fpn, self).__init__() # convs_fpn head factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), )
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='psa'): """ :param use_norm: :param num_class: :param layer_nums: :param layer_strides: :param num_filters: :param upsample_strides: :param num_upsample_filters: :param num_input_filters: :param num_anchor_per_loc: :param encode_background_as_zeros: :param use_direction_classifier: :param use_groupnorm: :param num_groups: :param use_bev: :param box_code_size: :param name: """ super(PSA, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc ## 2 self._use_direction_classifier = use_direction_classifier # True self._use_bev = use_bev # False assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: # True if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_filters, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) ################### refine self.bottle_conv = nn.Conv2d(sum(num_upsample_filters), sum(num_upsample_filters) // 3, 1) self.block1_dec2x = nn.MaxPool2d(kernel_size=2) ### C=64 self.block1_dec4x = nn.MaxPool2d(kernel_size=4) ### C=64 self.block2_dec2x = nn.MaxPool2d(kernel_size=2) ### C=128 self.block2_inc2x = ConvTranspose2d( num_filters[1], num_filters[0] // 2, upsample_strides[1], stride=upsample_strides[1]) ### C=32 self.block3_inc2x = ConvTranspose2d( num_filters[2], num_filters[1] // 2, upsample_strides[1], stride=upsample_strides[1]) #### C=64 self.block3_inc4x = ConvTranspose2d( num_filters[2], num_filters[0] // 2, upsample_strides[2], stride=upsample_strides[2]) #### C=32 self.fusion_block1 = nn.Conv2d( num_filters[0] + num_filters[0] // 2 + num_filters[0] // 2, num_filters[0], 1) self.fusion_block2 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[1] // 2, num_filters[1], 1) self.fusion_block3 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[2], num_filters[2], 1) self.refine_up1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.refine_up2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.refine_up3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) ####### C_Bottle = cfg.PSA.C_Bottle C = cfg.PSA.C_Reudce self.RF1 = Sequential( # 3*3 Conv2d(C_Bottle * 2, C, kernel_size=1, stride=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle * 2, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle * 2), nn.ReLU(inplace=True), ) self.RF2 = Sequential( # 5*5 Conv2d(C_Bottle, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle), nn.ReLU(inplace=True), ) self.RF3 = Sequential( # 7*7 Conv2d(C_Bottle // 2, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle // 2, kernel_size=3, stride=1, padding=1), BatchNorm2d(C_Bottle // 2), nn.ReLU(inplace=True), ) self.concat_conv1 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) ## kernel_size=3 self.concat_conv2 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.concat_conv3 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.refine_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.refine_loc = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.refine_dir = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1)
def __init__(self, output_shape, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_features=64, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='rpn'): super(SparseRPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNormReLU = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNormReLU = change_default_args(eps=1e-3, momentum=0.01)( scn.BatchNormReLU) Convolution = change_default_args(bias=False)(scn.Convolution) Deconvolution = change_default_args(bias=False)(scn.Deconvolution) SubmanifoldConvolution = change_default_args(bias=False)( scn.SubmanifoldConvolution) # else: # BatchNormReLU = Empty # Convolution = change_default_args(bias=True)(scn.Convolution) # Deconvolution = change_default_args(bias=True)( # scn.Deconvolution) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. # block2_input_filters = num_filters[0] # if use_bev: # self.bev_extractor = Sequential( # Conv2d(6, 32, 3, padding=1), # BatchNorm2d(32), # nn.ReLU(), # # nn.MaxPool2d(2, 2), # Conv2d(32, 64, 3, padding=1), # BatchNorm2d(64), # nn.ReLU(), # nn.MaxPool2d(2, 2), # ) # block2_input_filters += 64 sparse_shape = np.array(output_shape)[2:4] self.scn_input = scn.InputLayer(2, sparse_shape.tolist()) self.block1 = scn.Sequential( # nn.ZeroPad2d(1), Convolution( 2, num_input_features, num_filters[0], 2, layer_strides[0], False ), # dimension, nIn, nOut, filter_size, filter_stride, bias BatchNormReLU(num_filters[0])) for i in range(layer_nums[0]): self.block1.add( SubmanifoldConvolution( 2, num_filters[0], num_filters[0], 3, False)) # dimension, nIn, nOut, filter_size, bias self.block1.add(BatchNormReLU(num_filters[0])) # dimension, nIn, nOut, filter_size, filter_stride, bias # self.deconv1 = scn.Sequential( # # scn.SparseToDense(2, num_filters[0]), # Deconvolution( # 2, # num_filters[0], # num_upsample_filters[0], # upsample_strides[0], # upsample_strides[0], # False), # BatchNormReLU(num_upsample_filters[0]), # # # scn.OutputLayer(3) # not sure # scn.SparseToDense(2, num_upsample_filters[0]) # not sure # # scn.OutputLayer(2) # ) self.deconv1 = scn.SparseToDense(2, num_filters[0]) self.norm_dec1 = Sequential( nn.ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0], bias=False), nn.BatchNorm2d(num_upsample_filters[0]), nn.ReLU()) ###########################Block 2###################################### self.block2 = scn.Sequential( # nn.ZeroPad2d(0), Convolution( 2, num_filters[0], num_filters[1], 2, layer_strides[1], False ), # dimension, nIn, nOut, filter_size, filter_stride, bias BatchNormReLU(num_filters[1])) for i in range(layer_nums[1]): self.block2.add( SubmanifoldConvolution( 2, num_filters[1], num_filters[1], 3, False)) # dimension, nIn, nOut, filter_size, bias self.block2.add(BatchNormReLU(num_filters[1])) # dimension, nIn, nOut, filter_size, filter_stride, bias # self.deconv2 = scn.Sequential( # # scn.SparseToDense(2, num_filters[1]), # Deconvolution( # 2, # num_filters[1], # num_upsample_filters[1], # upsample_strides[1], # upsample_strides[1], # False), # BatchNormReLU(num_upsample_filters[1]), # scn.SparseToDense(2, num_upsample_filters[1]) # ) # self.deconv2 = scn.SparseToDense(2, num_filters[1]) self.norm_dec2 = Sequential( nn.ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1], bias=False), nn.BatchNorm2d(num_upsample_filters[1]), nn.ReLU()) ###########################Block 3###################################### self.block3 = scn.Sequential( # nn.ZeroPad2d(0), Convolution( 2, num_filters[1], num_filters[2], 2, layer_strides[2], False ), # dimension, nIn, nOut, filter_size, filter_stride, bias BatchNormReLU(num_filters[2])) for i in range(layer_nums[2]): self.block3.add( SubmanifoldConvolution( 2, num_filters[2], num_filters[2], 3, False)) # dimension, nIn, nOut, filter_size, bias self.block3.add(BatchNormReLU(num_filters[2])) # dimension, nIn, nOut, filter_size, filter_stride, bias # self.deconv3 = scn.Sequential( # # scn.SparseToDense(2, num_filters[2]), # Deconvolution( # 2, # num_filters[2], # num_upsample_filters[2], # upsample_strides[2], # upsample_strides[2], # False), # BatchNormReLU(num_upsample_filters[2]), # scn.SparseToDense(2, num_upsample_filters[2]) # ) self.deconv3 = scn.SparseToDense(2, num_filters[2]) self.norm_dec3 = Sequential( nn.ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2], bias=False), nn.BatchNorm2d(num_upsample_filters[2]), nn.ReLU()) self.post = Sequential( nn.Conv2d(sum(num_upsample_filters), 128, 3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 64, 3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(), ) # [1, 200, 176] if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) # self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) # self.conv_box = nn.Conv2d( # sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) # if use_direction_classifier: # self.conv_dir_cls = nn.Conv2d( # sum(num_upsample_filters), num_anchor_per_loc * 2, 1) self.conv_cls = nn.Conv2d(64, num_cls, 1) self.conv_box = nn.Conv2d(64, num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(64, num_anchor_per_loc * 2, 1)