def create_bnn_deploy_net(num_input_points, height, width): n = caffe.NetSpec() n.input_color = L.Input(shape=[dict(dim=[1, 2, 1, num_input_points])]) n.in_features = L.Input(shape=[dict(dim=[1, 4, 1, num_input_points])]) n.out_features = L.Input(shape=[dict(dim=[1, 4, height, width])]) n.scales = L.Input(shape=[dict(dim=[1, 4, 1, 1])]) n.flatten_scales = L.Flatten(n.scales, flatten_param=dict(axis=0)) n.in_scaled_features = L.Scale(n.in_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_scaled_features = L.Scale(n.out_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_color_result = L.Permutohedral(n.input_color, n.in_scaled_features, n.out_scaled_features, permutohedral_param=dict( num_output=2, group=1, neighborhood_size=0, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.DIAG)) return n.to_proto()
def bcl_bn_relu(n, name, top_prev, top_lat_feats, nout, lattic_scale=None, loop=1): for idx in range(loop): if lattic_scale: # if use python mode ["0*16_1*16_2*16", "0*8_1*8_2*8", "0*2_1*2_2*2"] # _lattic_scale = lattic_scale n[str(name)+"_scale_"+str(idx)] = L.Python(top_lat_feats, python_param=dict(module='bcl_layers', layer='PickAndScale', param_str=lattic_scale[idx])) _top_lat_feats = n[str(name)+"_scale_"+str(idx)] bltr_weight_filler = dict(type='gaussian', std=float(0.001)) n[str(name)+"_"+str(idx)] = L.Permutohedral(top_prev, _top_lat_feats, _top_lat_feats, ntop=1, permutohedral_param=dict( num_output=nout[idx], group=1, neighborhood_size=1, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE, filter_filler=bltr_weight_filler, bias_filler=dict(type='constant', value=0)), param=[{'lr_mult': 1, 'decay_mult': 1}, {'lr_mult': 2, 'decay_mult': 0}]) top_prev = n[str(name)+"_"+str(idx)] n[str(name)+'_bn_'+str(idx)] = L.BatchNorm(top_prev, batch_norm_param=dict(eps=1e-3, moving_average_fraction=0.99)) top_prev = n[str(name)+'_bn_'+str(idx)] n[str(name)+'_sc_'+str(idx)] = L.Scale(top_prev, scale_param=dict(bias_term=True)) top_prev = n[str(name)+'_sc_'+str(idx)] n[str(name)+'_relu_'+str(idx)] = L.ReLU(top_prev, in_place=True) top_prev = n[str(name)+'_relu_'+str(idx)] return top_prev
def extrapolate(arch_str='64_128_256_256', batchnorm=True, skip_str=(), # tuple of strings like '4_1_ga' - relu4 <- relu1 w/ options 'ga' bilateral_nbr=1, conv_weight_filler='xavier', bltr_weight_filler='gauss_0.001', dataset='shapenet', dataset_params=None, sample_size=3000, batch_size=32, feat_dims_str='x_y_z', lattice_dims_str=None, deploy=False, create_prototxt=True, save_path=None): n = caffe.NetSpec() arch_str = [(v[0], int(v[1:])) if v[0] in {'b', 'c'} else ('c', int(v)) for v in arch_str.split('_')] num_bltr_layers = sum(v[0] == 'b' for v in arch_str) if num_bltr_layers > 0: if type(lattice_dims_str) == str: lattice_dims_str = (lattice_dims_str,) * num_bltr_layers elif len(lattice_dims_str) == 1: lattice_dims_str = lattice_dims_str * num_bltr_layers else: assert len(lattice_dims_str) == num_bltr_layers, '{} lattices should be provided'.format(num_bltr_layers) feat_dims = parse_channel_scale(feat_dims_str, channel_str=True)[0] lattice_dims = [parse_channel_scale(s, channel_str=True)[0] for s in lattice_dims_str] input_dims_w_dup = feat_dims + reduce(lambda x, y: x + y, lattice_dims) input_dims = reduce(lambda x, y: x if y in x else x + [y], input_dims_w_dup, []) feat_dims_str = map_channel_scale(feat_dims_str, input_dims) lattice_dims_str = [map_channel_scale(s, input_dims) for s in lattice_dims_str] input_dims_str = '_'.join(input_dims) else: feat_dims = parse_channel_scale(feat_dims_str, channel_str=True)[0] input_dims = feat_dims feat_dims_str = map_channel_scale(feat_dims_str, input_dims) input_dims_str = '_'.join(input_dims) # dataset specific settings: nclass, datalayer_train, datalayer_test if dataset == 'airsim': nclass = len(feat_dims_str.split('_')) # default dataset params dataset_params_new = {} if not dataset_params else dataset_params dataset_params = dict(subset_train='train', subset_test='val') dataset_params.update(dataset_params_new) dataset_params['feat_dims'] = input_dims_str dataset_params['sample_size'] = sample_size dataset_params['batch_size'] = batch_size # dataset params type casting for v in {'jitter_xyz', 'jitter_rotation', 'jitter_stretch'}: if v in dataset_params: dataset_params[v] = float(dataset_params[v]) for v in {'sample_size', 'batch_size'}: if v in dataset_params: dataset_params[v] = int(dataset_params[v]) # training time dataset params dataset_params_train = dataset_params.copy() dataset_params_train['subset'] = dataset_params['subset_train'] del dataset_params_train['subset_train'], dataset_params_train['subset_test'] # testing time dataset params: turn off all data augmentations dataset_params_test = dataset_params.copy() dataset_params_test['subset'] = dataset_params['subset_test'] dataset_params_test['jitter_xyz'] = 0.0 dataset_params_test['jitter_stretch'] = 0.0 dataset_params_test['jitter_rotation'] = 0.0 del dataset_params_test['subset_train'], dataset_params_test['subset_test'] # data layers datalayer_train = L.Python(name='data', include=dict(phase=caffe.TRAIN), ntop=2, python_param=dict(module='dataset_airsim', layer='InputAirsim', param_str=repr(dataset_params_train))) datalayer_test = L.Python(name='data', include=dict(phase=caffe.TEST), ntop=0, top=['data', 'label'], python_param=dict(module='dataset_airsim', layer='InputAirsim', param_str=repr(dataset_params_test))) else: raise ValueError('Dataset {} unknown'.format(dataset)) # Input/Data layer if deploy: n.data = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size])) else: n.data, n.label = datalayer_train n.test_data = datalayer_test n.data_feat = L.Python(n.data, python_param=dict(module='custom_layers', layer='PickAndScale', param_str=feat_dims_str)) top_prev = n.data_feat if conv_weight_filler in {'xavier', 'msra'}: conv_weight_filler = dict(type=conv_weight_filler) elif conv_weight_filler.startswith('gauss_'): conv_weight_filler = dict(type='gaussian', std=float(conv_weight_filler.split('_')[1])) else: conv_weight_filler = eval(conv_weight_filler) assert bltr_weight_filler.startswith('gauss_') bltr_weight_filler = dict(type='gaussian', std=float(bltr_weight_filler.split('_')[1])) # multiple 1x1 conv-(bn)-relu blocks, optionally with a single global pooling somewhere among them idx = 1 bltr_idx = 0 lattices = dict() last_in_block = dict() for (layer_type, n_out) in arch_str: if layer_type == 'c': n['conv' + str(idx)] = L.Convolution(top_prev, convolution_param=dict(num_output=n_out, kernel_size=1, stride=1, pad=0, weight_filler=conv_weight_filler, bias_filler=dict(type='constant', value=0)), param=[dict(lr_mult=1), dict(lr_mult=0.1)]) elif layer_type == 'b': lattice_dims_str_curr = lattice_dims_str[bltr_idx] if lattice_dims_str_curr in lattices: top_data_lattice, top_lattice = lattices[lattice_dims_str_curr] n['conv' + str(idx)] = L.Permutohedral(top_prev, top_data_lattice, top_data_lattice, top_lattice, permutohedral_param=dict(num_output=n_out, group=1, neighborhood_size=bilateral_nbr, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE, filter_filler=bltr_weight_filler, bias_filler=dict(type='constant', value=0)), param=[{'lr_mult': 1, 'decay_mult': 1}, {'lr_mult': 2, 'decay_mult': 0}]) else: top_data_lattice = L.Python(n.data, python_param=dict(module='custom_layers', layer='PickAndScale', param_str=lattice_dims_str_curr)) n['data_lattice' + str(len(lattices))] = top_data_lattice if lattice_dims_str.count(lattice_dims_str_curr) > 1: n['conv' + str(idx)], top_lattice = L.Permutohedral(top_prev, top_data_lattice, top_data_lattice, ntop=2, permutohedral_param=dict( num_output=n_out, group=1, neighborhood_size=bilateral_nbr, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE, filter_filler=bltr_weight_filler, bias_filler=dict(type='constant', value=0)), param=[{'lr_mult': 1, 'decay_mult': 1}, {'lr_mult': 2, 'decay_mult': 0}]) n['lattice' + str(len(lattices))] = top_lattice else: n['conv' + str(idx)] = L.Permutohedral(top_prev, top_data_lattice, top_data_lattice, permutohedral_param=dict( num_output=n_out, group=1, neighborhood_size=bilateral_nbr, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE, filter_filler=bltr_weight_filler, bias_filler=dict(type='constant', value=0)), param=[{'lr_mult': 1, 'decay_mult': 1}, {'lr_mult': 2, 'decay_mult': 0}]) top_lattice = None lattices[lattice_dims_str_curr] = (top_data_lattice, top_lattice) bltr_idx += 1 top_prev = n['conv' + str(idx)] if batchnorm: n['bn'+str(idx)] = L.BatchNorm(top_prev) top_prev = n['bn'+str(idx)] n['relu'+str(idx)] = L.ReLU(top_prev, in_place=True) top_prev = n['relu'+str(idx)] # skip connection & global pooling if skip_str is None: skip_str = () skip_tos = [v.split('_')[0] for v in skip_str] if str(idx) in skip_tos: skip_idxs = list(filter(lambda i: skip_tos[i] == str(idx), range(len(skip_tos)))) skip_params = [skip_str[i].split('_') for i in skip_idxs] if len(skip_params[0]) == 2: assert all(len(v) == 2 for v in skip_params) else: assert all(v[2] == skip_params[0][2] for v in skip_params) if len(skip_params[0]) > 2 and 'g' in skip_params[0][2]: # global pooling on current layer n['gpool'+str(idx)] = L.Python(top_prev, python_param=dict(module='custom_layers', layer='GlobalPooling')) top_prev = n['gpool'+str(idx)] if len(skip_params[0]) > 2 and 'a' in skip_params[0][2]: # addition instead of concatenation n['add'+str(idx)] = L.Eltwise(top_prev, *[last_in_block[int(v[1])] for v in skip_params], eltwise_param=dict(operation=P.Eltwise.SUM)) top_prev = n['add'+str(idx)] else: n['concat'+str(idx)] = L.Concat(top_prev, *[last_in_block[int(v[1])] for v in skip_params]) top_prev = n['concat'+str(idx)] last_in_block[idx] = top_prev idx += 1 # classification & loss n['conv'+str(idx)] = L.Convolution(top_prev, convolution_param=dict(num_output=nclass, kernel_size=1, stride=1, pad=0, weight_filler=conv_weight_filler, bias_filler=dict(type='constant', value=0)), param=[dict(lr_mult=1), dict(lr_mult=0.1)]) top_prev = n['conv'+str(idx)] if deploy: n.prob = L.Softmax(top_prev) else: # n.loss = L.SoftmaxWithLoss(top_prev, n.label) # n.accuracy = L.Accuracy(top_prev, n.label) n.loss = L.EuclideanLoss(top_prev, n.label) net = n.to_proto() if create_prototxt: net = get_prototxt(net, save_path) return net
def create_bnn_cnn_net_fold_stage(num_input_frames, fold_id='0', stage_id='1', phase=None): n = caffe.NetSpec() if phase == 'TRAIN': n.img, n.padimg, n.unary, n.in_features, n.out_features, n.spixel_indices, n.scales1, n.scales2, n.unary_scales, n.label = \ L.Python(python_param = dict(module = "input_data_layer", layer = "InputRead", param_str = "TRAIN_1000000_" + fold_id + '_' + stage_id), include = dict(phase = 0), ntop = 10) elif phase == 'TEST': n.img, n.padimg, n.unary, n.in_features, n.out_features, n.spixel_indices, n.scales1, n.scales2, n.unary_scales, n.label = \ L.Python(python_param = dict(module = "input_data_layer", layer = "InputRead", param_str = "VAL_50_" + fold_id + '_' + stage_id), include = dict(phase = 1), ntop = 10) else: n.img = L.Input(shape=[dict(dim=[1, 3, 480, 854])]) n.padimg = L.Input(shape=[dict(dim=[1, 3, 481, 857])]) n.unary = L.Input( shape=[dict(dim=[1, 2, num_input_frames, max_spixels])]) n.in_features = L.Input( shape=[dict(dim=[1, 6, num_input_frames, max_spixels])]) n.out_features = L.Input(shape=[dict(dim=[1, 6, 1, max_spixels])]) n.spixel_indices = L.Input(shape=[dict(dim=[1, 1, 480, 854])]) n.scales1 = L.Input(shape=[dict(dim=[1, 6, 1, 1])]) n.scales2 = L.Input(shape=[dict(dim=[1, 6, 1, 1])]) n.unary_scales = L.Input(shape=[dict(dim=[1, 1, num_input_frames, 1])]) n.flatten_scales1 = L.Flatten(n.scales1, flatten_param=dict(axis=0)) n.flatten_scales2 = L.Flatten(n.scales2, flatten_param=dict(axis=0)) n.flatten_unary_scales = L.Flatten(n.unary_scales, flatten_param=dict(axis=0)) n.in_scaled_features1 = L.Scale(n.in_features, n.flatten_scales1, scale_param=dict(axis=1)) n.out_scaled_features1 = L.Scale(n.out_features, n.flatten_scales1, scale_param=dict(axis=1)) n.in_scaled_features2 = L.Scale(n.in_features, n.flatten_scales2, scale_param=dict(axis=1)) n.out_scaled_features2 = L.Scale(n.out_features, n.flatten_scales2, scale_param=dict(axis=1)) n.scaled_unary = L.Scale(n.unary, n.flatten_unary_scales, scale_param=dict(axis=2)) ### Start of BNN # BNN - stage - 1 n.out_seg1 = L.Permutohedral(n.scaled_unary, n.in_scaled_features1, n.out_scaled_features1, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_seg2 = L.Permutohedral(n.scaled_unary, n.in_scaled_features2, n.out_scaled_features2, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.concat_out_seg_1 = L.Concat(n.out_seg1, n.out_seg2, concat_param=dict(axis=1)) n.concat_out_relu_1 = L.ReLU(n.concat_out_seg_1, in_place=True) # BNN - stage - 2 n.out_seg3 = L.Permutohedral(n.concat_out_relu_1, n.out_scaled_features1, n.out_scaled_features1, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_seg4 = L.Permutohedral(n.concat_out_relu_1, n.out_scaled_features2, n.out_scaled_features2, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.concat_out_seg_2 = L.Concat(n.out_seg3, n.out_seg4, concat_param=dict(axis=1)) n.concat_out_relu_2 = L.ReLU(n.concat_out_seg_2, in_place=True) # BNN - combination n.connection_out = L.Concat(n.concat_out_relu_1, n.concat_out_relu_2) n.spixel_out_seg = L.Convolution(n.connection_out, convolution_param=dict( num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.spixel_out_seg_relu = L.ReLU(n.spixel_out_seg, in_place=True) # Going from superpixels to pixels n.out_seg_bilateral = L.Smear(n.spixel_out_seg_relu, n.spixel_indices) ### BNN - DeepLab Combination n.deeplab_seg_presoftmax = deeplab(n.padimg, n.img, n.spixel_indices) n.deeplab_seg = L.Softmax(n.deeplab_seg_presoftmax) n.bnn_deeplab_connection = L.Concat(n.out_seg_bilateral, n.deeplab_seg) n.bnn_deeplab_seg = L.Convolution(n.bnn_deeplab_connection, convolution_param=dict( num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_deeplab_seg_relu = L.ReLU(n.bnn_deeplab_seg, in_place=True) ### Start of CNN # CNN - Stage 1 n.out_seg_spatial1 = L.Convolution(n.bnn_deeplab_seg_relu, convolution_param=dict( num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_seg_spatial_relu1 = L.ReLU(n.out_seg_spatial1, in_place=True) # CNN - Stage 2 n.out_seg_spatial2 = L.Convolution(n.out_seg_spatial_relu1, convolution_param=dict( num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_seg_spatial_relu2 = L.ReLU(n.out_seg_spatial2, in_place=True) # CNN - Stage 3 n.out_seg_spatial = L.Convolution(n.out_seg_spatial_relu2, convolution_param=dict( num_output=2, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.5)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) # Normalization n.out_seg = normalize(n.out_seg_spatial, 2) if phase == 'TRAIN' or phase == 'TEST': n.loss = L.LossWithoutSoftmax(n.out_seg, n.label, loss_param=dict(ignore_label=1000), loss_weight=1) n.accuracy = L.Accuracy(n.out_seg, n.label, accuracy_param=dict(ignore_label=1000)) n.loss2 = L.SoftmaxWithLoss(n.deeplab_seg_presoftmax, n.label, loss_param=dict(ignore_label=1000), loss_weight=1) n.accuracy2 = L.Accuracy(n.deeplab_seg_presoftmax, n.label, accuracy_param=dict(ignore_label=1000)) else: n.spixel_out_seg_2 = L.SpixelFeature(n.out_seg, n.spixel_indices, spixel_feature_param=dict( type=P.SpixelFeature.AVGRGB, max_spixels=12000, rgb_scale=1.0)) n.spixel_out_seg_final = normalize(n.spixel_out_seg_2, 2) return n.to_proto()
def create_bnn_deploy_net(num_input_frames): n = caffe.NetSpec() n.unary = L.Input(shape=[dict(dim=[1, 2, num_input_frames, max_spixels])]) n.in_features = L.Input( shape=[dict(dim=[1, 6, num_input_frames, max_spixels])]) n.out_features = L.Input(shape=[dict(dim=[1, 6, 1, max_spixels])]) n.spixel_indices = L.Input(shape=[dict(dim=[1, 1, 480, 854])]) n.scales1 = L.Input(shape=[dict(dim=[1, 6, 1, 1])]) n.scales2 = L.Input(shape=[dict(dim=[1, 6, 1, 1])]) n.unary_scales = L.Input(shape=[dict(dim=[1, 1, num_input_frames, 1])]) n.flatten_scales1 = L.Flatten(n.scales1, flatten_param=dict(axis=0)) n.flatten_scales2 = L.Flatten(n.scales2, flatten_param=dict(axis=0)) n.flatten_unary_scales = L.Flatten(n.unary_scales, flatten_param=dict(axis=0)) n.in_scaled_features1 = L.Scale(n.in_features, n.flatten_scales1, scale_param=dict(axis=1)) n.out_scaled_features1 = L.Scale(n.out_features, n.flatten_scales1, scale_param=dict(axis=1)) n.in_scaled_features2 = L.Scale(n.in_features, n.flatten_scales2, scale_param=dict(axis=1)) n.out_scaled_features2 = L.Scale(n.out_features, n.flatten_scales2, scale_param=dict(axis=1)) n.scaled_unary = L.Scale(n.unary, n.flatten_unary_scales, scale_param=dict(axis=2)) n.out_seg1 = L.Permutohedral(n.scaled_unary, n.in_scaled_features1, n.out_scaled_features1, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_seg2 = L.Permutohedral(n.unary, n.in_scaled_features2, n.out_scaled_features2, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.concat_out_seg = L.Concat(n.out_seg1, n.out_seg2, concat_param=dict(axis=1)) n.concat_out_relu = L.ReLU(n.concat_out_seg, in_place=True) n.spixel_out_seg1 = L.Convolution(n.concat_out_relu, convolution_param=dict( num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.spixel_out_seg_final = normalize(n.spixel_out_seg1, 2) n.out_seg = L.Smear(n.spixel_out_seg_final, n.spixel_indices) return n.to_proto()
def create_bnn_cnn_net(num_input_points, height, width, phase=None): n = caffe.NetSpec() n.input_color = L.Input(shape=[dict(dim=[1, 2, 1, num_input_points])]) n.in_features = L.Input(shape=[dict(dim=[1, 4, 1, num_input_points])]) n.out_features = L.Input(shape=[dict(dim=[1, 4, height, width])]) n.scales = L.Input(shape=[dict(dim=[1, 4, 1, 1])]) n.flatten_scales = L.Flatten(n.scales, flatten_param=dict(axis=0)) n.in_scaled_features = L.Scale(n.in_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_scaled_features = L.Scale(n.out_features, n.flatten_scales, scale_param=dict(axis=1)) ### Start of BNN # BNN - stage - 1 n.out_color1 = L.Permutohedral( n.input_color, n.in_scaled_features, n.out_scaled_features, permutohedral_param=dict(num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.5), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_out_relu_1 = L.ReLU(n.out_color1, in_place=True) # BNN - stage - 2 n.out_color2 = L.Permutohedral(n.bnn_out_relu_1, n.out_scaled_features, n.out_scaled_features, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_out_relu_2 = L.ReLU(n.out_color2, in_place=True) # BNN - combination n.connection_out = L.Concat(n.bnn_out_relu_1, n.bnn_out_relu_2) n.out_color_bilateral = L.Convolution( n.connection_out, convolution_param=dict(num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_bilateral_relu = L.ReLU(n.out_color_bilateral, in_place=True) ### Start of CNN # CNN - Stage 1 n.out_color_spatial1 = L.Convolution( n.out_color_bilateral_relu, convolution_param=dict(num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu1 = L.ReLU(n.out_color_spatial1, in_place=True) # CNN - Stage 2 n.out_color_spatial2 = L.Convolution( n.out_color_spatial_relu1, convolution_param=dict(num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu2 = L.ReLU(n.out_color_spatial2, in_place=True) # CNN - Stage 3 n.out_color_spatial = L.Convolution(n.out_color_spatial_relu2, convolution_param=dict( num_output=2, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu = L.ReLU(n.out_color_spatial, in_place=True) n.final_connection_out = L.Concat(n.out_color_bilateral_relu, n.out_color_spatial_relu) n.out_color_result = L.Convolution(n.final_connection_out, convolution_param=dict( num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) return n.to_proto()