Пример #1
0
def fcn(obj_cls, part, split):
    n = caffe.NetSpec()
    n.data, n.label = L.Python(
        module='pascalpart_layers',
        layer='PASCALPartSegDataLayer',
        ntop=2,
        param_str=str(
            dict(voc_dir='/home/cv/hdl/caffe/data/pascal/VOC',
                 part_dir='/home/cv/hdl/caffe/data/pascal/pascal-part',
                 obj_cls=obj_cls,
                 part=part,
                 split=split,
                 seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(
        n.drop7,
        num_output=11,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
                                 convolution_param=dict(num_output=11,
                                                        kernel_size=4,
                                                        stride=2,
                                                        bias_term=False),
                                 param=[dict(lr_mult=0)])

    n.score_pool4 = L.Convolution(
        n.pool4,
        num_output=11,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool4c = crop(n.score_pool4, n.upscore2)
    n.fuse_pool4 = L.Eltwise(n.upscore2,
                             n.score_pool4c,
                             operation=P.Eltwise.SUM)
    n.upscore16 = L.Deconvolution(n.fuse_pool4,
                                  convolution_param=dict(num_output=11,
                                                         kernel_size=32,
                                                         stride=16,
                                                         bias_term=False),
                                  param=[dict(lr_mult=0)])

    n.score = crop(n.upscore16, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Пример #2
0
def generate_model(split, config):
    n = caffe.NetSpec()
    dataset = config.dataset
    batch_size = config.N
    mode_str = str(dict(dataset=dataset, split=split, batch_size=batch_size))
    n.image1, n.image2, n.label, n.sample_weights, n.feat_crop = L.Python(
        module=config.data_provider,
        layer=config.data_provider_layer,
        param_str=mode_str,
        ntop=5)

    ################################
    # the base net (VGG-16) branch 1
    n.conv1_1, n.relu1_1 = conv_relu(n.image1,
                                     64,
                                     param_names=('conv1_1_w', 'conv1_1_b'),
                                     fix_param=True,
                                     finetune=False)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1,
                                     64,
                                     param_names=('conv1_2_w', 'conv1_2_b'),
                                     fix_param=True,
                                     finetune=False)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1,
                                     128,
                                     param_names=('conv2_1_w', 'conv2_1_b'),
                                     fix_param=True,
                                     finetune=False)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1,
                                     128,
                                     param_names=('conv2_2_w', 'conv2_2_b'),
                                     fix_param=True,
                                     finetune=False)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2,
                                     256,
                                     param_names=('conv3_1_w', 'conv3_1_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1,
                                     256,
                                     param_names=('conv3_2_w', 'conv3_2_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2,
                                     256,
                                     param_names=('conv3_3_w', 'conv3_3_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.pool3 = max_pool(n.relu3_3)
    # spatial L2 norm
    n.pool3_lrn = L.LRN(n.pool3, local_size=513, alpha=513, beta=0.5, k=1e-16)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3,
                                     512,
                                     param_names=('conv4_1_w', 'conv4_1_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1,
                                     512,
                                     param_names=('conv4_2_w', 'conv4_2_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2,
                                     512,
                                     param_names=('conv4_3_w', 'conv4_3_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    # spatial L2 norm
    n.relu4_3_lrn = L.LRN(n.relu4_3,
                          local_size=1025,
                          alpha=1025,
                          beta=0.5,
                          k=1e-16)
    #n.pool4 = max_pool(n.relu4_3)

    #n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512,
    #                                 param_names=('conv5_1_w', 'conv5_1_b'),
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=config.finetune)
    #n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512,
    #                                 param_names=('conv5_2_w', 'conv5_2_b'),
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=config.finetune)
    #n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512,
    #                                 param_names=('conv5_3_w', 'conv5_3_b'),
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=config.finetune)
    # upsampling feature map
    #n.relu5_3_upsampling = L.Deconvolution(n.relu5_3,
    #                                       convolution_param=dict(num_output=512,
    #                                                              group=512,
    #                                                              kernel_size=4,
    #                                                              stride=2,
    #                                                              pad=1,
    #                                                              bias_term=False,
    #                                                              weight_filler=dict(type='bilinear')),
    #                                       param=[dict(lr_mult=0, decay_mult=0)])
    # spatial L2 norm
    #n.relu5_3_lrn = L.LRN(n.relu5_3_upsampling, local_size=1025, alpha=1025, beta=0.5, k=1e-16)

    # concat all skip features
    #n.feat_all1 = n.relu4_3_lrn
    n.feat_all1 = L.Concat(n.pool3_lrn,
                           n.relu4_3_lrn,
                           concat_param=dict(axis=1))
    #n.feat_all1 = L.Concat(n.pool3_lrn, n.relu4_3_lrn, n.relu5_3_lrn, concat_param=dict(axis=1))
    n.feat_all1_crop = L.Crop(n.feat_all1,
                              n.feat_crop,
                              crop_param=dict(axis=2,
                                              offset=[
                                                  config.query_featmap_H // 3,
                                                  config.query_featmap_W // 3
                                              ]))

    ################################
    # the base net (VGG-16) branch 2
    n.conv1_1_p, n.relu1_1_p = conv_relu(n.image2,
                                         64,
                                         param_names=('conv1_1_w',
                                                      'conv1_1_b'),
                                         fix_param=True,
                                         finetune=False)
    n.conv1_2_p, n.relu1_2_p = conv_relu(n.relu1_1_p,
                                         64,
                                         param_names=('conv1_2_w',
                                                      'conv1_2_b'),
                                         fix_param=True,
                                         finetune=False)
    n.pool1_p = max_pool(n.relu1_2_p)

    n.conv2_1_p, n.relu2_1_p = conv_relu(n.pool1_p,
                                         128,
                                         param_names=('conv2_1_w',
                                                      'conv2_1_b'),
                                         fix_param=True,
                                         finetune=False)
    n.conv2_2_p, n.relu2_2_p = conv_relu(n.relu2_1_p,
                                         128,
                                         param_names=('conv2_2_w',
                                                      'conv2_2_b'),
                                         fix_param=True,
                                         finetune=False)
    n.pool2_p = max_pool(n.relu2_2_p)

    n.conv3_1_p, n.relu3_1_p = conv_relu(n.pool2_p,
                                         256,
                                         param_names=('conv3_1_w',
                                                      'conv3_1_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv3_2_p, n.relu3_2_p = conv_relu(n.relu3_1_p,
                                         256,
                                         param_names=('conv3_2_w',
                                                      'conv3_2_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv3_3_p, n.relu3_3_p = conv_relu(n.relu3_2_p,
                                         256,
                                         param_names=('conv3_3_w',
                                                      'conv3_3_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.pool3_p = max_pool(n.relu3_3_p)
    # spatial L2 norm
    n.pool3_lrn_p = L.LRN(n.pool3_p,
                          local_size=513,
                          alpha=513,
                          beta=0.5,
                          k=1e-16)

    n.conv4_1_p, n.relu4_1_p = conv_relu(n.pool3_p,
                                         512,
                                         param_names=('conv4_1_w',
                                                      'conv4_1_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv4_2_p, n.relu4_2_p = conv_relu(n.relu4_1_p,
                                         512,
                                         param_names=('conv4_2_w',
                                                      'conv4_2_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv4_3_p, n.relu4_3_p = conv_relu(n.relu4_2_p,
                                         512,
                                         param_names=('conv4_3_w',
                                                      'conv4_3_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    # spatial L2 norm
    n.relu4_3_lrn_p = L.LRN(n.relu4_3_p,
                            local_size=1025,
                            alpha=1025,
                            beta=0.5,
                            k=1e-16)
    #n.pool4_p = max_pool(n.relu4_3_p)

    #n.conv5_1_p, n.relu5_1_p = conv_relu(n.pool4_p, 512,
    #                                     param_names=('conv5_1_w', 'conv5_1_b'),
    #                                     fix_param=config.fix_vgg,
    #                                     finetune=config.finetune)
    #n.conv5_2_p, n.relu5_2_p = conv_relu(n.relu5_1_p, 512,
    #                                     param_names=('conv5_2_w', 'conv5_2_b'),
    #                                     fix_param=config.fix_vgg,
    #                                     finetune=config.finetune)
    #n.conv5_3_p, n.relu5_3_p = conv_relu(n.relu5_2_p, 512,
    #                                     param_names=('conv5_3_w', 'conv5_3_b'),
    #                                     fix_param=config.fix_vgg,
    #                                     finetune=config.finetune)
    # upsampling feature map
    #n.relu5_3_upsampling_p = L.Deconvolution(n.relu5_3_p,
    #                                         convolution_param=dict(num_output=512,
    #                                                                group=512,
    #                                                                kernel_size=4,
    #                                                                stride=2,
    #                                                                pad=1,
    #                                                                bias_term=False,
    #                                                                weight_filler=dict(type='bilinear')),
    #                                         param=[dict(lr_mult=0, decay_mult=0)])
    # spatial L2 norm
    #n.relu5_3_lrn_p = L.LRN(n.relu5_3_upsampling_p, local_size=1025, alpha=1025, beta=0.5, k=1e-16)

    # concat all skip features
    #n.feat_all2 = n.relu4_3_lrn_p
    n.feat_all2 = L.Concat(n.pool3_lrn_p,
                           n.relu4_3_lrn_p,
                           concat_param=dict(axis=1))
    #n.feat_all2 = L.Concat(n.pool3_lrn_p, n.relu4_3_lrn_p, n.relu5_3_lrn_p, concat_param=dict(axis=1))

    # Dyn conv layer
    n.fcn_scores = L.DynamicConvolution(n.feat_all2,
                                        n.feat_all1_crop,
                                        convolution_param=dict(
                                            num_output=1,
                                            kernel_size=11,
                                            stride=1,
                                            pad=5,
                                            bias_term=False))

    # scale scores with zero mean 0.01196 -> 0.02677
    n.fcn_scaled_scores = L.Power(n.fcn_scores,
                                  power_param=dict(scale=0.01196,
                                                   shift=-1.0,
                                                   power=1))

    # Loss Layer
    n.loss = L.WeightedSigmoidCrossEntropyLoss(n.fcn_scaled_scores, n.label,
                                               n.sample_weights)

    return n.to_proto()
Пример #3
0
def qlstm(mode, batchsize, T, question_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize})
    # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
    #     module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 )
    n.data, n.cont, n.img_feature, n.label = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 )
    
    # word embedding (static + dynamic)
    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    # n.embed = L.TanH(n.embed_ba)
    n.embed_scale = L.Scale(n.embed_ba, n.cont, scale_param=dict(dict(axis=0)))
    n.embed_scale_resh = L.Reshape(n.embed_scale,\
                          reshape_param=dict(\
                              shape=dict(dim=[batchsize,1,T,300])))

    # convolution
    n.word_feature_2 = L.Convolution(n.embed_scale_resh, kernel_h=2, kernel_w=300, stride=1, num_output=512, pad_h=1, pad_w=0, weight_filler=dict(type='xavier')) # N x 512 x ? x 1
    n.word_feature_3 = L.Convolution(n.embed_scale_resh, kernel_h=3, kernel_w=300, stride=1, num_output=512, pad_h=2, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_feature_4 = L.Convolution(n.embed_scale_resh, kernel_h=4, kernel_w=300, stride=1, num_output=512, pad_h=3, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_feature_5 = L.Convolution(n.embed_scale_resh, kernel_h=5, kernel_w=300, stride=1, num_output=512, pad_h=4, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_relu_2 = L.ReLU(n.word_feature_2)
    n.word_relu_3 = L.ReLU(n.word_feature_3)
    n.word_relu_4 = L.ReLU(n.word_feature_4)
    n.word_relu_5 = L.ReLU(n.word_feature_5)
    n.word_vec_2 = L.Pooling(n.word_relu_2, kernel_h=T+1, kernel_w=1, stride=T+1, pool=P.Pooling.MAX) # N x 512 x 1 x 1
    n.word_vec_3 = L.Pooling(n.word_relu_3, kernel_h=T+2, kernel_w=1, stride=T+2, pool=P.Pooling.MAX)
    n.word_vec_4 = L.Pooling(n.word_relu_4, kernel_h=T+3, kernel_w=1, stride=T+3, pool=P.Pooling.MAX)
    n.word_vec_5 = L.Pooling(n.word_relu_5, kernel_h=T+4, kernel_w=1, stride=T+4, pool=P.Pooling.MAX)
    word_vec = [n.word_vec_2, n.word_vec_3, n.word_vec_4, n.word_vec_5]
    n.concat_vec = L.Concat(*word_vec, concat_param={'axis': 1}) # N x 2048 x 1 x 1
    n.concat_vec_dropped = L.Dropout(n.concat_vec,dropout_param={'dropout_ratio':0.5})
    
    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.concat_vec_dropped, axis=2, tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[-1,2048,14,14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000,sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,dropout_param={'dropout_ratio':0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,2,14*14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,2,14,14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis':1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.att_feature0  = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy)
    n.att_feature1  = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy)
    n.att_feature0_resh = L.Reshape(n.att_feature0, reshape_param=dict(shape=dict(dim=[-1,2048])))
    n.att_feature1_resh = L.Reshape(n.att_feature1, reshape_param=dict(shape=dict(dim=[-1,2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,4096,1,1])))
    #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.concat_vec_dropped, 
                                      compact_bilinear_param=dict(num_output=16000,sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio':0.1})
    n.bc_dropped_resh = L.Reshape(n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
Пример #4
0
def generate_model(split, config):
    n = caffe.NetSpec()
    batch_size = config.N
    mode_str = str(dict(split=split, batch_size=batch_size))
    n.language, n.cont, n.image, n.spatial, n.label = L.Python(module=config.data_provider,
                                                               layer=config.data_provider_layer,
                                                               param_str=mode_str,
                                                               ntop=5)

    # the base net (VGG-16)
    n.conv1_1, n.relu1_1 = conv_relu(n.image, 64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool5 = max_pool(n.relu5_3)

    n.fc6, n.relu6 = fc_relu(n.pool5, 4096,
                             fix_param=config.fix_vgg,
                             finetune=(not config.fix_vgg))
    
    if config.vgg_dropout:
        n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
        n.fc7, n.relu7 = fc_relu(n.drop6, 4096,
                                 fix_param=config.fix_vgg,
                                 finetune=(not config.fix_vgg))
        n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
        n.fc8 = fc(n.drop7, 1000,
                   fix_param=config.fix_vgg,
                   finetune=(not config.fix_vgg))
    else:
        n.fc7, n.relu7 = fc_relu(n.relu6, 4096,
                                 fix_param=config.fix_vgg,
                                 finetune=(not config.fix_vgg))
        n.fc8 = fc(n.relu7, 1000,
                   fix_param=config.fix_vgg,
                   finetune=(not config.fix_vgg))

    # embedding
    n.embed = L.Embed(n.language, input_dim=config.vocab_size,
                      num_output=config.embed_dim,
                      weight_filler=dict(type='uniform', min=-0.08, max=0.08))

    # LSTM
    n.lstm = L.LSTM(n.embed, n.cont,
                    recurrent_param=dict(num_output=config.lstm_dim,
                                         weight_filler=dict(type='uniform', min=-0.08, max=0.08),
                                         bias_filler=dict(type='constant', value=0)))
    tops = L.Slice(n.lstm, ntop=config.T, slice_param=dict(axis=0))
    for i in range(config.T - 1):
        n.__setattr__('slice'+str(i), tops[i])
        n.__setattr__('silence'+str(i), L.Silence(tops[i], ntop=0))
    n.lstm_out = tops[-1]
    n.lstm_feat = L.Reshape(n.lstm_out, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim])))

    # L2 Normalize image and language features
    n.img_l2norm = L.L2Normalize(n.fc8)
    n.lstm_l2norm = L.L2Normalize(n.lstm_feat)
    n.img_l2norm_resh = L.Reshape(n.img_l2norm,
                                  reshape_param=dict(shape=dict(dim=[-1, 1000])))
    n.lstm_l2norm_resh = L.Reshape(n.lstm_l2norm,
                                  reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim])))

    # Concatenate
    n.feat_all = L.Concat(n.lstm_l2norm_resh, n.img_l2norm_resh, n.spatial, concat_param=dict(axis=1))

    # MLP Classifier over concatenated feature
    n.mlp_l1, n.mlp_relu1 = fc_relu(n.feat_all, config.mlp_hidden_dims)
    if config.mlp_dropout:
        n.mlp_drop1 = L.Dropout(n.mlp_relu1, dropout_ratio=0.5, in_place=True)
        n.scores = fc(n.mlp_drop1, 1)
    else:
        n.scores = fc(n.mlp_relu1, 1)

    # Loss Layer
    n.loss = L.SigmoidCrossEntropyLoss(n.scores, n.label)

    return n.to_proto()
Пример #5
0
def build_AlexNet(split,
                  num_classes,
                  batch_size,
                  resize_w,
                  resize_h,
                  crop_w=0,
                  crop_h=0,
                  crop_margin=0,
                  mirror=0,
                  rotate=0,
                  HSV_prob=0,
                  HSV_jitter=0,
                  train=True):
    weight_param = dict(lr_mult=1, decay_mult=1)
    bias_param = dict(lr_mult=2, decay_mult=0)
    learned_param = [weight_param, bias_param]

    frozen_param = [dict(lr_mult=0)] * 2

    n = caffe.NetSpec()

    pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892))

    pydata_params['dir'] = '../../../datasets/WebVision'
    pydata_params['train'] = True
    pydata_params['batch_size'] = batch_size
    pydata_params['resize'] = False
    pydata_params['resize_w'] = resize_w
    pydata_params['resize_h'] = resize_h
    pydata_params['crop_w'] = crop_w
    pydata_params['crop_h'] = crop_h
    pydata_params['crop_margin'] = crop_margin
    pydata_params['mirror'] = mirror
    pydata_params['rotate'] = rotate
    pydata_params['HSV_prob'] = HSV_prob
    pydata_params['HSV_jitter'] = HSV_jitter
    pydata_params['num_classes'] = num_classes

    n.data, n.label, n.label_score = L.Python(
        module='layers',
        layer='customDataLayerWithLabelScore',
        ntop=3,
        param_str=str(pydata_params))

    n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, param=learned_param)
    n.pool1 = max_pool(n.relu1, 3, stride=2)
    n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75)
    n.conv2, n.relu2 = conv_relu(n.norm1,
                                 5,
                                 256,
                                 pad=2,
                                 group=2,
                                 param=learned_param)
    n.pool2 = max_pool(n.relu2, 3, stride=2)
    n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75)
    n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1, param=learned_param)
    n.conv4, n.relu4 = conv_relu(n.relu3,
                                 3,
                                 384,
                                 pad=1,
                                 group=2,
                                 param=learned_param)
    n.conv5, n.relu5 = conv_relu(n.relu4,
                                 3,
                                 256,
                                 pad=1,
                                 group=2,
                                 param=learned_param)
    n.pool5 = max_pool(n.relu5, 3, stride=2)
    n.fc6, n.relu6 = fc_relu(n.pool5, 4096, param=boosted_param)  #4096
    if train:
        n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True)
    else:
        fc7input = n.relu6
    n.fc7, n.relu7 = fc_relu(fc7input, 4096, param=boosted_param)  #4096
    if train:
        n.drop7 = fc8input = L.Dropout(n.relu7, in_place=True)
    else:
        fc8input = n.relu7

    fc8 = L.InnerProduct(fc8input,
                         num_output=num_classes,
                         weight_filler=dict(type='gaussian', std=0.005),
                         bias_filler=dict(type='constant', value=0.1),
                         param=boosted_param)

    n.__setattr__('classifier', fc8)
    if not train:
        n.probs = L.Softmax(fc8)

    #n.loss = L.SoftmaxWithLoss(fc8, n.label)

    n.loss = L.Python(fc8,
                      n.label,
                      n.label_score,
                      module='layers',
                      layer='SoftmaxSoftLabel',
                      ntop=1)

    n.acc = L.Accuracy(fc8, n.label)

    if train:
        with open('train.prototxt', 'w') as f:
            f.write(str(n.to_proto()))
            return f.name
    else:
        with open('val.prototxt', 'w') as f:
            f.write(str(n.to_proto()))
            return f.name
Пример #6
0
def fcn(split, tops):
    n = caffe.NetSpec()
    n.color, n.label = L.Python(module='nyud_layers',
                                layer='NYUDSegDataLayer',
                                ntop=3,
                                param_str=str(
                                    dict(nyud_dir='../data/nyud',
                                         split=split,
                                         tops=tops,
                                         seed=1337)))
    n.data = L.Concat(n.color)

    # the base net
    n.conv1_1_bgrd, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(
        n.drop7,
        num_output=40,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=40,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Пример #7
0
def add_multilabel_err_layer(net, bottom, name):
    """ Add a MultilabelErr layer """
    net[name] = L.Python(bottom[0],
                         bottom[1],
                         python_param=dict(module='layers.multilabel_err',
                                           layer='MultiLabelErr'))
Пример #8
0
def mfb_baseline(mode, batchsize, T, question_vocab_size, folder):
    n = caffe.NetSpec()
    mode_str = json.dumps({
        'mode': mode,
        'batchsize': batchsize,
        'folder': folder
    })
    if mode == 'val':
        n.data, n.cont, n.img_feature, n.label = L.Python( \
            module='vqa_data_layer', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=4)
    else:
        n.data, n.cont, n.img_feature, n.label = L.Python( \
            module='vqa_data_layer_kld', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=4)
    n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
                      weight_filler=dict(type='xavier'))
    n.embed_tanh = L.TanH(n.embed)

    # LSTM
    n.lstm1 = L.LSTM( \
        n.embed_tanh, n.cont, \
        recurrent_param=dict( \
            num_output=config.LSTM_UNIT_NUM, \
            weight_filler=dict(type='xavier')))
    tops1 = L.Slice(n.lstm1,
                    ntop=config.MAX_WORDS_IN_QUESTION,
                    slice_param={'axis': 0})
    for i in xrange(config.MAX_WORDS_IN_QUESTION - 1):
        n.__setattr__('slice_first' + str(i), tops1[int(i)])
        n.__setattr__('silence_data_first' + str(i),
                      L.Silence(tops1[int(i)], ntop=0))
    n.lstm1_out = tops1[config.MAX_WORDS_IN_QUESTION - 1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out, \
                                 reshape_param=dict( \
                                     shape=dict(dim=[-1, 1024])))
    n.q_feat = L.Dropout(
        n.lstm1_reshaped,
        dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO})
    '''
    Coarse Image-Question MFB fusion
    '''

    n.mfb_q_proj = L.InnerProduct(n.q_feat,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_i_proj = L.InnerProduct(n.img_feature,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj,
                                 n.mfb_i_proj,
                                 eltwise_param=dict(operation=0))
    n.mfb_iq_drop = L.Dropout(
        n.mfb_iq_eltwise,
        dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO})
    n.mfb_iq_resh = L.Reshape(
        n.mfb_iq_drop,
        reshape_param=dict(shape=dict(
            dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM])))
    n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \
                                 pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.mfb_out = L.Reshape(n.mfb_iq_sumpool, \
                          reshape_param=dict(shape=dict(dim=[-1, config.MFB_OUT_DIM])))
    n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out)
    n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt)

    n.prediction = L.InnerProduct(n.mfb_l2,
                                  num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier'))
    if mode == 'val':
        n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    else:
        n.loss = L.SoftmaxKLDLoss(n.prediction, n.label)
    return n.to_proto()
Пример #9
0
def fcn(mode):
    net = caffe.NetSpec()
    data_params = dict(mode = mode, mean=(104.00699, 116.66877, 122.67892),
                        seed = 1337)
    if mode == 'train':
        data_params['data_dir']='/jet/prs/workspace/VOCdevkit/VOC2012'  ##TODO
        data_layer = 'TrainingDataLayer'
    elif mode == 'val':
        data_params['data_dir']='/jet/prs/workspace/VOCdevkit/VOC2012' ##TODO
        data_layer = 'ValidDataLayer' 
    
    net.data, net.label = layers.Python(module='data_layer', layer = data_layer, 
                    ntop=2, param_str = str(data_params))

    # layer1 , conv+relu -> conv+relu -> max_pooling
    net.conv1_1 = conv(net.data, 64, pad=100)
    net.relu1_1 = relu(net.conv1_1)
    net.conv1_2 = conv(net.relu1_1, 64)
    net.relu1_2 = relu(net.conv1_2)
    net.pool1 = max_pooling(net.relu1_2)

    # layer2, conv+relu -> conv+relu -> max_pooling
    net.conv2_1 = conv(net.pool1, 128)
    net.relu2_1 = relu(net.conv2_1)
    net.conv2_2 = conv(net.relu2_1, 128)
    net.relu2_2 = relu(net.conv2_2)
    net.pool2 = max_pooling(net.relu2_2)

    # layer3, conv+relu -> conv+relu -> max_pooling
    net.conv3_1 = conv(net.pool2, 256)
    net.relu3_1 = relu(net.conv3_1)
    net.conv3_2 = conv(net.relu3_1, 256)
    net.relu3_2 = relu(net.conv3_2)
    net.conv3_3 = conv(net.relu3_2, 256)
    net.relu3_3 = relu(net.conv3_3)
    net.pool3 = max_pooling(net.relu3_3)

    # layer4, conv+relu -> conv+relu -> max_pooling
    net.conv4_1 = conv(net.pool3, 512)
    net.relu4_1 = relu(net.conv4_1)
    net.conv4_2 = conv(net.relu4_1, 512)
    net.relu4_2 = relu(net.conv4_2)
    net.conv4_3 = conv(net.relu4_2, 512)
    net.relu4_3 = relu(net.conv4_3)
    net.pool4 = max_pooling(net.relu4_3)

    # layer5, conv+relu -> conv+relu -> max_pooling
    net.conv5_1 = conv(net.pool4, 512)
    net.relu5_1 = relu(net.conv5_1)
    net.conv5_2 = conv(net.relu5_1, 512)
    net.relu5_2 = relu(net.conv5_2)
    net.conv5_3 = conv(net.relu5_2, 512)
    net.relu5_3 = relu(net.conv5_3)
    net.pool5 = max_pooling(net.relu5_3)


    # layer6, conv + relu -> dropout
    net.fc6 = conv(net.pool5, 4096, ks=7, pad=0)
    net.relu6 = relu(net.fc6)
    net.drop6 = dropout(net.relu6)

    # layer7, conv + relu -> dropout
    net.fc7 = conv(net.drop6, 4096, ks=1, pad=0)
    net.relu7 = relu(net.fc7)
    net.drop7 = dropout(net.relu7)

    # layer8, forward score
    net.score_fr = conv(net.drop7, 21, ks=1, pad=0)
    net.upscore1 = deconv(net.score_fr, 21, ks=4, stride = 2)

    # layer9, 
    net.score_pool4 = conv(net.pool4, 21, ks=1, pad=0)
    net.score_pool4_crop = crop(net.score_pool4, net.upscore1)
    net.integrate_pool4 = sumup(net.upscore1, net.score_pool4_crop)
    net.upscore2 = deconv(net.integrate_pool4, 21, ks=32, stride = 16)
    

    net.score = crop(net.upscore2, net.data)
    net.loss = softmax(net.score, net.label)



    # layer9, skip with layer4: conv -> crop -> sum up -> deconv
    #net.score2_1 = conv(net.pool4, 21, ks=1, pad=0)
    #net.score2_1c = crop(net.score2_1, net.upscore1_1)
    #net.sum_score2_1 = sumup(net.upscore1_1, net.score2_1c)
    #net.upscore2_1 = deconv(net.sum_score2_1, 21)

    # layer10, skip with layer3: conv->crop->sum up->deconv
    #net.score3_1 = conv(net.pool3, 21, ks=1, pad=0)
    #net.score3_1c = crop(net.score3_1, net.upscore2_1)
    #net.sum_score3_1 = sumup(net.upscore2_1, net.score3_1c)
    #net.upscore3_1 = deconv(net.sum_score3_1, 21)

    #net.score = crop(net.upscore3_1, net.data)
    #net.loss = softmax(net.score, net.data)

    return net.to_proto()
Пример #10
0
def generate_conv_features(split, config):
    n = caffe.NetSpec()
    dataset = config.dataset
    batch_size = config.N
    mode_str = str(dict(dataset=dataset, split=split, batch_size=batch_size))
    n.image, n.label = L.Python(module=config.data_provider,
                                layer=config.data_provider_layer_1,
                                param_str=mode_str,
                                ntop=2)

    # the base net (VGG-16)
    n.conv1_1, n.relu1_1 = conv_relu(n.image, 64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool3 = max_pool(n.relu3_3)
    # spatial L2 norm
    n.pool3_lrn = L.LRN(n.pool3, local_size=513, alpha=513, beta=0.5, k=1e-16)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    # spatial L2 norm
    n.relu4_3_lrn = L.LRN(n.relu4_3, local_size=1025, alpha=1025, beta=0.5, k=1e-16)
    #n.pool4 = max_pool(n.relu4_3)

    #n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512,
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=(not config.fix_vgg))
    #n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512,
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=(not config.fix_vgg))
    #n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512,
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=(not config.fix_vgg))
    # upsampling feature map
    #n.relu5_3_upsampling = L.Deconvolution(n.relu5_3,
    #                                       convolution_param=dict(num_output=512,
    #                                                              group=512,
    #                                                              kernel_size=4,
    #                                                              stride=2,
    #                                                              pad=1,
    #                                                              bias_term=False,
    #                                                              weight_filler=dict(type='bilinear')),
    #                                       param=[dict(lr_mult=0, decay_mult=0)])
    # spatial L2 norm
    #n.relu5_3_lrn = L.LRN(n.relu5_3_upsampling, local_size=1025, alpha=1025, beta=0.5, k=1e-16)

    # concat all skip features
    #n.feat_all = n.relu4_3_lrn
    n.feat_all = L.Concat(n.pool3_lrn, n.relu4_3_lrn, concat_param=dict(axis=1))
    #n.feat_all = L.Concat(n.pool3_lrn, n.relu4_3_lrn, n.relu5_3_lrn, concat_param=dict(axis=1))

    return n.to_proto()
Пример #11
0
def fcn(split):
  n = caffe.NetSpec()
  if split=='train':
    data_params = dict(mean=(104.00699, 116.66877, 122.67892))
    data_params['root'] = 'data/HED-BSDS'
    data_params['source'] = "train_pair.lst"
    data_params['shuffle'] = True
    n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \
    param_str=str(data_params))
  elif split == 'test':
    n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500])))
  else:
    raise Exception("Invalid phase")

  n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
  n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
  n.pool1 = max_pool(n.relu1_2)

  n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
  n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
  n.pool2 = max_pool(n.relu2_2)

  n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
  n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
  n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
  n.pool3 = max_pool(n.relu3_3)

  n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
  n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
  n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
  n.pool4 = max_pool(n.relu4_3)
  
  n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0])
  n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0])
  n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0])
  
  # DSN1
  n.score_dsn1=full_conv(n.conv1_2, 'score-dsn1', lr=1)
  n.upscore_dsn1 = crop(n.score_dsn1, n.data)
  if split=='train':
    n.loss1 = L.BalanceCrossEntropyLoss(n.upscore_dsn1, n.label, loss_param=dict(normalize=False))
  if split=='test':
    n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1)
  # DSN2
  n.score_dsn2 = full_conv(n.conv2_2, 'score-dsn2', lr=1)
  n.score_dsn2_up = upsample(n.score_dsn2, stride=2)
  n.upscore_dsn2 = crop(n.score_dsn2_up, n.data)
  if split=='train':
    n.loss2 = L.BalanceCrossEntropyLoss(n.upscore_dsn2, n.label, loss_param=dict(normalize=False))
  if split=='test':
    n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2)
  # DSN3
  n.score_dsn3=full_conv(n.conv3_3, 'score-dsn3', lr=1)
  n.score_dsn3_up = upsample(n.score_dsn3, stride=4)
  n.upscore_dsn3 = crop(n.score_dsn3_up, n.data)
  if split=='train':
    n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=dict(normalize=False))
  if split=='test':
    n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)
  # DSN4
  n.score_dsn4 = full_conv(n.conv4_3, 'score-dsn4', lr=1)
  n.score_dsn4_up = upsample(n.score_dsn4, stride=8)
  n.upscore_dsn4 = crop(n.score_dsn4_up, n.data)
  if split=='train':
    n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=dict(normalize=False))
  if split=='test':
    n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)
  # DSN5
  n.score_dsn5=full_conv(n.conv5_3, 'score-dsn5', lr=1)
  n.score_dsn5_up = upsample(n.score_dsn5, stride=16)
  n.upscore_dsn5 = crop(n.score_dsn5_up, n.data)
  if split=='train':
    n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=dict(normalize=False))
  elif split=='test':
    n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)
  else:
    raise Exception("Error")
  # concat and fuse
  n.concat_upscore = L.Concat(n.upscore_dsn1,
                      n.upscore_dsn2,
                      n.upscore_dsn3,
                      n.upscore_dsn4,
                      n.upscore_dsn5,
                      name='concat', concat_param=dict({'concat_dim':1}))
  n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', 
                 num_output=1, kernel_size=1,
                 param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)],
                 weight_filler=dict(type='constant', value=0.2))
  if split=='test':
    n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
  if split=='train':
    n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=dict(normalize=False))
  return n.to_proto()
Пример #12
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(dataset_dir='/home/kevin/dataset/processed_data3',
                         variable='depth_map',
                         split=split,
                         mean=(2),
                         seed=1337,
                         batch_size=256,
                         frame_num=30,
                         img_size=(227, 227))
    if split == 'deploy':
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=1),
                   dict(dim=227),
                   dict(dim=227)])
    else:
        if split is 'train':
            pydata_params['dtype'] = 'frame'
            pylayer = 'ModelNetDataLayer'
        else:
            pydata_params['dtype'] = 'object'
            pylayer = 'ModelNetDataLayer'

        n.img, n.label = L.Python(module='data_layers.model_net_layer',
                                  layer=pylayer,
                                  ntop=2,
                                  param_str=str(pydata_params))

    # the base net
    n.conv1, n.relu1 = conv_relu("conv1", n.img, 96, ks=11, stride=4, pad=0)
    n.pool1 = max_pool(n.relu1, ks=3)
    n.norm1 = L.LRN(n.pool1,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv2, n.relu2 = conv_relu("conv2", n.norm1, 256, ks=5, pad=2, group=2)
    n.pool2 = max_pool(n.relu2, ks=3)
    n.norm2 = L.LRN(n.pool2,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv3, n.relu3 = conv_relu("conv3", n.norm2, 384, ks=3, pad=1)

    n.conv4, n.relu4 = conv_relu("conv4", n.relu3, 384, ks=3, pad=1, group=2)

    n.conv5, n.relu5 = conv_relu("conv5", n.relu4, 256, ks=3, pad=1, group=2)

    n.pool5 = max_pool(n.relu5, ks=3)

    n.fc6, n.relu6 = fc_relu(n.pool5, 4096, lr1=1, lr2=2)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = fc_relu(n.drop6, 4096, lr1=1, lr2=2)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.fc8 = fc(n.drop7, 40, lr1=1, lr2=2)

    if split != 'deploy':

        n.accuracy = L.Accuracy(n.fc8, n.label)
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

        #n.loss = L.Python(n.fc8, n.label, loss_weight=1, module='nn_layers.max_softmax_loss_layer', layer='MaxSoftmaxLossLayer')

    # n.display = L.Scale(n.corr, param=[dict(lr_mult=0)], filler=dict(type='constant',value=1.0))
    # n.fc9_bn = L.BatchNorm(n.relu9, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True))

    return n.to_proto()
Пример #13
0
 def python_layer(self, inputs, module, layer, param_str, ntop=1):
     return L.Python(*inputs,
                     module=module,
                     layer=layer,
                     param_str=str(param_str),
                     ntop=1)
Пример #14
0
 def python_input_layer(self, module, layer, param_str):
     tops = L.Python(module=module,
                     layer=layer,
                     param_str=str(param_str),
                     ntop=len(param_str['top_names']))
     self.rename_tops(tops, param_str['top_names'])
Пример #15
0
def pj_x(mode, batchsize, T, exp_T, question_vocab_size, exp_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize})
    n.data, n.cont, n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \
        L.Python(module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=8)

    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08), param=fixed_weights)
    n.embed = L.TanH(n.embed_ba) 

    n.exp_embed_ba = L.Embed(n.exp, input_dim=exp_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform', min=-0.08, max=0.08))
    n.exp_embed = L.TanH(n.exp_embed_ba)

    # LSTM1
    n.lstm1 = L.LSTM(\
                   n.embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)),
                   param=fixed_weights_lstm)
    tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis':0})
    for i in range(T-1):
        n.__setattr__('slice_first'+str(i), tops1[int(i)])
        n.__setattr__('silence_data_first'+str(i), L.Silence(tops1[int(i)],ntop=0))
    n.lstm1_out = tops1[T-1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,dropout_param={'dropout_ratio':0.3})
    n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':0.3})
    # LSTM2
    n.lstm2 = L.LSTM(\
                   n.lstm1_droped, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)),
                   param=fixed_weights_lstm)
    tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis':0})
    for i in range(T-1):
        n.__setattr__('slice_second'+str(i), tops2[int(i)])
        n.__setattr__('silence_data_second'+str(i), L.Silence(tops2[int(i)],ntop=0))
    n.lstm2_out = tops2[T-1]
    n.lstm2_reshaped = L.Reshape(n.lstm2_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,dropout_param={'dropout_ratio':0.3})
    concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped]
    n.lstm_12 = L.Concat(*concat_botom)


    # Tile question feature
    n.q_emb_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.q_emb_tiled_1 = L.Tile(n.q_emb_resh, axis=2, tiles=14)
    n.q_emb_resh_tiled = L.Tile(n.q_emb_tiled_1, axis=3, tiles=14)

    # Embed image feature
    n.i_emb = L.Convolution(n.img_feature, kernel_size=1, stride=1,
                            num_output=2048, pad=0, weight_filler=dict(type='xavier'),
                            param=fixed_weights)

    # Eltwise product and normalization
    n.eltwise = L.Eltwise(n.q_emb_resh_tiled, n.i_emb, eltwise_param={'operation': P.Eltwise.PROD})
    n.eltwise_sqrt = L.SignedSqrt(n.eltwise)
    n.eltwise_l2 = L.L2Normalize(n.eltwise_sqrt)
    n.eltwise_drop = L.Dropout(n.eltwise_l2, dropout_param={'dropout_ratio': 0.3})

    # Attention for VQA
    n.att_conv1 = L.Convolution(n.eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att_map = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14])))
    
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.att_feature  = L.SoftAttention(n.img_feature, n.att_map, dummy)
    n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,2048])))

    # eltwise product + normalization again for VQA
    n.i_emb2 = L.InnerProduct(n.att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.eltwise2 = L.Eltwise(n.lstm_12, n.i_emb2, eltwise_param={'operation': P.Eltwise.PROD})
    n.eltwise2_sqrt = L.SignedSqrt(n.eltwise2)
    n.eltwise2_l2 = L.L2Normalize(n.eltwise2_sqrt)
    n.eltwise2_drop = L.Dropout(n.eltwise2_l2, dropout_param={'dropout_ratio': 0.3})

    n.prediction = L.InnerProduct(n.eltwise2_drop, num_output=3000, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)

    # Embed VQA GT answer during training
    n.exp_emb_ans = L.Embed(n.label, input_dim=3000, num_output=300, \
        weight_filler=dict(type='uniform', min=-0.08, max=0.08))
    n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans)
    n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier'))

    # Merge VQA answer and visual+textual feature
    n.exp_emb_resh = L.Reshape(n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14)
    n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14)
    n.eltwise_emb = L.Convolution(n.eltwise, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier'))
    n.exp_eltwise = L.Eltwise(n.eltwise_emb,  n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise)
    n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt)
    n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3})

    # Attention for Explanation
    n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'))
    n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1)
    n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'))
    n.exp_att_reshaped = L.Reshape(n.exp_att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14])))
    n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2)
    n.exp_att_map = L.Reshape(n.exp_att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14])))
    
    exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.exp_att_feature_prev  = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy)
    n.exp_att_feature_resh = L.Reshape(n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'))
    n.exp_lstm12_embed = L.InnerProduct(n.lstm_12, num_output=2048, weight_filler=dict(type='xavier'))
    n.exp_eltwise2 = L.Eltwise(n.exp_lstm12_embed, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_eltwise2, eltwise_param={'operation': P.Eltwise.PROD})


    # LSTM1 for Explanation
    n.exp_lstm1 = L.LSTM(\
                   n.exp_embed, n.exp_cont_1,\
                   recurrent_param=dict(\
                       num_output=2048,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))

    n.exp_lstm1_dropped = L.Dropout(n.exp_lstm1,dropout_param={'dropout_ratio':0.3})

    # merge with LSTM1 for explanation
    n.exp_att_resh = L.Reshape(n.exp_att_feature, reshape_param=dict(shape=dict(dim=[1, -1, 2048])))
    n.exp_att_tiled = L.Tile(n.exp_att_resh, axis=0, tiles=exp_T)
    n.exp_eltwise_all = L.Eltwise(n.exp_lstm1_dropped, n.exp_att_tiled, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_all_sqrt = L.SignedSqrt(n.exp_eltwise_all)
    n.exp_eltwise_all_l2 = L.L2Normalize(n.exp_eltwise_all_sqrt)
    n.exp_eltwise_all_drop = L.Dropout(n.exp_eltwise_all_l2, dropout_param={'dropout_ratio': 0.3})

    # LSTM2 for Explanation
    n.exp_lstm2 = L.LSTM(\
                   n.exp_eltwise_all_drop, n.exp_cont_2,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    n.exp_lstm2_dropped = L.Dropout(n.exp_lstm2,dropout_param={'dropout_ratio':0.3})
    
    n.exp_prediction = L.InnerProduct(n.exp_lstm2_dropped, num_output=exp_vocab_size, weight_filler=dict(type='xavier'), axis=2)

    n.exp_loss = L.SoftmaxWithLoss(n.exp_prediction, n.exp_out,
                                   loss_param=dict(ignore_label=-1),
                                   softmax_param=dict(axis=2))
    n.exp_accuracy = L.Accuracy(n.exp_prediction, n.exp_out, axis=2, ignore_label=-1)

    return n.to_proto()
Пример #16
0
	def createEmbeddingNetwork(self, database_list_path='.', batch_size=20, phase=0):
		dataset_path = database_list_path
		dataLayer = L.HDF5Data(name='dataLayer', 
						source=dataset_path, 
						batch_size=batch_size, 
						ntop=2+self.number_of_neighbors,
						include=list([dict(phase=phase)]))# tops-> target, [neighbors], negative
		#data -> [target, neighbor1, neighbor2, ..., neighbork, negative]
		self.net.target = dataLayer[0]
		self.net.negative = dataLayer[-1]
		for l in range(1, self.number_of_neighbors+1):
			setattr(self.net, 'neighbor{0}'.format(l-1), dataLayer[l])		

		
		#First layer of inner product 
		self.net.inner_product_target = self.getInnerProduct('target', 'inner_product_target', 1)
		self.net.inner_product_negative = self.getInnerProduct('negative', 'inner_product_negative', 1)
		for i in range(0, self.number_of_neighbors):
			layer = self.getInnerProduct('neighbor{0}'.format(i), 'inner_product_neighbor{0}'.format(i), 1)
			setattr(self.net, 'inner_product_neighbor{0}'.format(i), layer)
		
		#ReLU
		self.net.relu_target = L.ReLU(self.net.inner_product_target, name='relu_target', in_place=True)
		self.net.relu_negative = L.ReLU(self.net.inner_product_negative, name='relu_negative', in_place=True)
		for i in range(0, self.number_of_neighbors):
			layer = L.ReLU(getattr(self.net, 'inner_product_neighbor{0}'.format(i)), 
					name='relu_neighbor{0}'.format(i),
					in_place=True)
			setattr(self.net, 'relu_neighbor{0}'.format(i), layer)
		
		#Second layer of inner product
		#self.net.inner_product2_target = self.getInnerProduct('inner_product_target', 'inner_product2_target', 2)
		#self.net.inner_product2_negative = self.getInnerProduct('inner_product_negative', 'inner_product2_negative', 2)
		#for i in range(0, self.number_of_neighbors):
		#	layer = self.getInnerProduct('inner_product_neighbor{0}'.format(i), 
		#					'inner_product2_neighbor{0}'.format(i), 2)
		#	setattr(self.net, 'inner_product2_neighbor{0}'.format(i), layer)
			
		#Context
		'''
		context_sum_bottom = []
		for i in range(0, self.number_of_neighbors):
			context_sum_bottom.append(getattr(self.net, 'inner_product2_neighbor{0}'.format(i)))
		coeff = 1.0/self.number_of_neighbors		
		self.net.context_sum = L.Eltwise(*context_sum_bottom,
						name='context_sum',
						operation=P.Eltwise.SUM, # 1 -> SUM
						coeff=list([coeff for i in range(self.number_of_neighbors)]))
		
		#Target - Negative
		self.net.target_negative_diff = L.Eltwise(self.net.inner_product2_target, self.net.inner_product2_negative,
								name='target_negative_diff',
								operation=P.Eltwise.SUM, # SUM
								coeff=list([1,-1])) # target - negative
		'''
		#Context
		context_sum_bottom = []
		for i in range(0, self.number_of_neighbors):
			context_sum_bottom.append(getattr(self.net, 'inner_product_neighbor{0}'.format(i)))
		coeff = 1.0/self.number_of_neighbors		
		self.net.context_sum = L.Eltwise(*context_sum_bottom,
						name='context_sum',
						operation=P.Eltwise.SUM, #  SUM
						coeff=list([coeff for i in range(self.number_of_neighbors)]))
		
		#Target - Negative
		self.net.target_negative_diff = L.Eltwise(self.net.inner_product_target, self.net.inner_product_negative,
								name='target_negative_diff',
								operation=P.Eltwise.SUM, # SUM
								coeff=list([1,-1])) # target - negative
		

		#Loss layer
		self.net.loss = L.Python(self.net.context_sum, self.net.target_negative_diff,
						name='loss',
						module='my_dot_product_layer',
						layer='MyHingLossDotProductLayer')
Пример #17
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '/home/tramac/mydata/VOCdevkit/VOC2012'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.score_fr = L.Convolution(
        n.drop7,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=21,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Пример #18
0
pydata_params = dict(split=split_train, mean=(104, 117, 123))

pydata_params['dir'] = '../../../datasets/WebVision'
pydata_params['train'] = True
pydata_params['num_classes'] = num_labels
pydata_params['batch_size'] = batch_size
pydata_params['resize'] = resize
pydata_params['resize_w'] = resize_w
pydata_params['resize_h'] = resize_h
pydata_params['crop_w'] = crop_w
pydata_params['crop_h'] = crop_h
pydata_params['crop_margin'] = crop_margin
pydata_params['mirror'] = mirror
pydata_params['rotate_prob'] = rotate_prob
pydata_params['rotate_angle'] = rotation_angle
pydata_params['HSV_prob'] = HSV_prob
pydata_params['HSV_jitter'] = HSV_jitter
pydata_params['color_casting_prob'] = color_casting_prob
pydata_params['color_casting_jitter'] = color_casting_jitter
pydata_params['scaling_prob'] = scaling_prob
pydata_params['scaling_factor'] = scaling_factor

pylayer = 'customDataLayer'

n.data, n.label = L.Python(module='layers',
                           layer=pylayer,
                           ntop=2,
                           param_str=str(pydata_params))
with open('prototxt/data_layer.prototxt', 'w') as f:
    f.write(str(n.to_proto()))
Пример #19
0
def qlstm(mode, batchsize, T, question_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode': mode, 'batchsize': batchsize})
    # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
    #     module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 )
    n.data, n.cont, n.img_feature, n.label = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 )

    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    n.embed = L.TanH(n.embed_ba)
    # concat_word_embed = [n.embed, n.glove]
    # n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600

    # LSTM1
    n.lstm1 = L.LSTM(\
                   n.embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis': 0})
    for i in xrange(T - 1):
        n.__setattr__('slice_first' + str(i), tops1[int(i)])
        n.__setattr__('silence_data_first' + str(i),
                      L.Silence(tops1[int(i)], ntop=0))
    n.lstm1_out = tops1[T - 1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,
                                        dropout_param={'dropout_ratio': 0.3})
    n.lstm1_droped = L.Dropout(n.lstm1, dropout_param={'dropout_ratio': 0.3})
    # LSTM2
    n.lstm2 = L.LSTM(\
                   n.lstm1_droped, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis': 0})
    for i in xrange(T - 1):
        n.__setattr__('slice_second' + str(i), tops2[int(i)])
        n.__setattr__('silence_data_second' + str(i),
                      L.Silence(tops2[int(i)], ntop=0))
    n.lstm2_out = tops2[T - 1]
    n.lstm2_reshaped = L.Reshape(n.lstm2_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,
                                        dropout_param={'dropout_ratio': 0.3})
    concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped]
    n.lstm_12 = L.Concat(*concat_botom)

    n.q_emb_tanh_droped_resh = L.Reshape(
        n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.q_emb_tanh_droped_resh,
                                              axis=2,
                                              tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1,
                                            axis=3,
                                            tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(
        n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled,
                               n.i_emb_tanh_droped_resh,
                               compact_bilinear_param=dict(num_output=16000,
                                                           sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,
                              dropout_param={'dropout_ratio': 0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped,
                                kernel_size=1,
                                stride=1,
                                num_output=512,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu,
                                kernel_size=1,
                                stride=1,
                                num_output=2,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(
        n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,
                      reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0,
                                     dummy)
    n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1,
                                     dummy)
    n.att_feature0_resh = L.Reshape(
        n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature1_resh = L.Reshape(
        n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(
        n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1])))
    n.lstm_12_resh = L.Reshape(
        n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh,
                                      n.lstm_12_resh,
                                      compact_bilinear_param=dict(
                                          num_output=16000, sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2,
                             dropout_param={'dropout_ratio': 0.1})
    n.bc_dropped_resh = L.Reshape(
        n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh,
                                  num_output=3000,
                                  weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
Пример #20
0
def net(split):
  n = caffe.NetSpec()
  loss_param = dict(normalize=False)
  if split=='train':
    data_params = dict(mean=(104.00699, 116.66877, 122.67892))
    # 图像与标签
    data_params['root'] = './datasets/Total_Text_WSR'
    data_params['source'] = "Total_Text_WSR.lst"

    data_params['shuffle'] = True
    data_params['ignore_label'] = -1
    n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \
    param_str=str(data_params))
    if data_params.has_key('ignore_label'):
      loss_param['ignore_label'] = int(data_params['ignore_label'])
  elif split == 'test':
    n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500])))
  else:
    raise Exception("Invalid phase")


#第一个卷积阶段
  n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
  n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
  n.pool1 = max_pool(n.relu1_2)

#第二个卷积阶段 
  n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
  n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
  n.pool2 = max_pool(n.relu2_2)

#第三个卷积阶段
  n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
  n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
  n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)

# 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 64, kernel: 3*3
  n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100,1,200,0])
  n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100,1,200,0])
  n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100,1,200,0])
  n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100,1,200,0])  
# 在Channel维度上进行拼接 
  n.concat_conv33 = L.Concat(n.conv3_dilation1,
                      n.conv3_dilation2,
                      n.conv3_dilation3,
                      n.conv3_dilation4, 
                      concat_param=dict({'concat_dim':1}))

# MCFE模块后接BLSTM module
# # ===================== prepare lstm inputs =====================
  n.im2col_conv33 = L.Im2col(n.concat_conv33, convolution_param=dict(kernel_size=3, pad=1))
  n.im2col_transpose_conv33 = L.Transpose(n.im2col_conv33, transpose_param =dict(dim=[3,2,0,1]))  
  n.lstm_input_conv33 = L.Reshape(n.im2col_transpose_conv33, reshape_param =dict(shape=dict(dim=-1), axis=1, num_axes=2))

# 前向LSTM  
  n.lstm_conv33 = L.Lstm(n.lstm_input_conv33,lstm_param =dict(num_output=128,weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1))
#后向LSTM
  n.rlstm_input_conv33 = L.Reverse(n.lstm_input_conv33, name='lstm_reverse1_conv33', reverse_param =dict(axis=0))
  n.rlstm_output_conv33= L.Lstm(n.rlstm_input_conv33, name='rlstm_conv33', lstm_param =dict(num_output=128))
  n.rlstm_conv33 = L.Reverse(n.rlstm_output_conv33, name='lstm_reverse2_conv33', reverse_param =dict(axis=0))

# lstm_conv33 和 rlstm_conv33经过Concat拼接,n*c*(h1+h2+...+hk)*w
  n.merge_lstm_rlstm_conv33 = L.Concat(n.lstm_conv33, n.rlstm_conv33, concat_param=dict(axis=2))
  n.lstm_output_reshape_conv33 = L.Reshape(n.merge_lstm_rlstm_conv33, reshape_param=dict(shape=dict(dim=[-1,1]), axis=1, num_axes=1))
# transpose size of output as (N, C, H, W)
  n.lstm_output_conv33 = L.Transpose(n.lstm_output_reshape_conv33,transpose_param=dict(dim=[2,3,1,0]))
  n.pool3 = max_pool(n.relu3_3)

# 第四个卷积阶段
  n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
  n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
  n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)

# 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 128, kernel: 3*3
  n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100,1,200,0])
  n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100,1,200,0])
  n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100,1,200,0])
  n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100,1,200,0])  
# 在Channel维度上进行拼接, n*(c1+c2+...+ck)*h*w 
  n.concat_conv43 = L.Concat(n.conv4_dilation1,
                      n.conv4_dilation2,
                      n.conv4_dilation3,
                      n.conv4_dilation4, 
                      concat_param=dict({'concat_dim':1}))

# BLSTM module
# # ===================== prepare lstm inputs =====================
  n.im2col_conv43 = L.Im2col(n.concat_conv43, convolution_param=dict(kernel_size=3, pad=1))
  n.im2col_transpose_conv43 = L.Transpose(n.im2col_conv43, transpose_param =dict(dim=[3,2,0,1]))
  n.lstm_input_conv43 = L.Reshape(n.im2col_transpose_conv43, reshape_param =dict(shape=dict(dim=-1), axis=1, num_axes=2))
# 前向LSTM  
  n.lstm_conv43 = L.Lstm(n.lstm_input_conv43,lstm_param =dict(num_output=256,weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1))  
# 后向LSTM
  n.rlstm_input_conv43 = L.Reverse(n.lstm_input_conv43, name='lstm_reverse1_conv43', reverse_param =dict(axis=0))
  n.rlstm_output_conv43= L.Lstm(n.rlstm_input_conv43, name='rlstm_conv43', lstm_param =dict(num_output=256))
  n.rlstm_conv43 = L.Reverse(n.rlstm_output_conv43, name='lstm_reverse2_conv43', reverse_param =dict(axis=0))

#lstm_conv43 和 rlstm_conv43经Concat拼接,n*c*(h1+h2+...+hk)*w
  n.merge_lstm_rlstm_conv43 = L.Concat(n.lstm_conv43, n.rlstm_conv43, concat_param=dict(axis=2))
  n.lstm_output_reshape_conv43 = L.Reshape(n.merge_lstm_rlstm_conv43, reshape_param=dict(shape=dict(dim=[-1,1]), axis=1, num_axes=1))
# transpose size of output as (N, C, H, W)
  n.lstm_output_conv43 = L.Transpose(n.lstm_output_reshape_conv43,transpose_param=dict(dim=[2,3,1,0]))
  n.pool4 = max_pool(n.relu4_3)


# The fiveth conv stage 
  n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
  n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
  n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)

# MCFE inception module, Channel: 128, kernel: 3*3
  n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100,1,200,0])
  n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100,1,200,0])
  n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100,1,200,0])
  n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100,1,200,0])  
  n.concat_conv53 = L.Concat(n.conv5_dilation1,
                      n.conv5_dilation2,
                      n.conv5_dilation3,
                      n.conv5_dilation4,
                      concat_param=dict({'concat_dim':1}))


#  BLSTM module
# ===================== prepare lstm inputs =====================
  n.im2col_conv53 = L.Im2col(n.concat_conv53, convolution_param=dict(kernel_size=3, pad=1))
  n.im2col_transpose_conv53 = L.Transpose(n.im2col_conv53, transpose_param =dict(dim=[3,2,0,1]))
  n.lstm_input_conv53 = L.Reshape(n.im2col_transpose_conv53, reshape_param =dict(shape=dict(dim=-1), axis=1, num_axes=2))

# 前向LSTM 
  n.lstm_conv53 = L.Lstm(n.lstm_input_conv53,lstm_param =dict(num_output=256,weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1))

#后向LSTM
  n.rlstm_input_conv53 = L.Reverse(n.lstm_input_conv53, name='lstm_reverse1_conv53', reverse_param =dict(axis=0))
  n.rlstm_output_conv53= L.Lstm(n.rlstm_input_conv53, name='rlstm_conv53', lstm_param =dict(num_output=256))
  n.rlstm_conv53 = L.Reverse(n.rlstm_output_conv53, name='lstm_reverse2_conv53', reverse_param =dict(axis=0))  
# lstm_conv53和rlstm_conv53经过Concat拼接,n*c*(h1+h2+...+hk)*w
  n.merge_lstm_rlstm_conv53 = L.Concat(n.lstm_conv53, n.rlstm_conv53, concat_param=dict(axis=2))
  n.lstm_output_reshape_conv53 = L.Reshape(n.merge_lstm_rlstm_conv53, reshape_param=dict(shape=dict(dim=[-1,1]), axis=1, num_axes=1))
# transpose size of output as (N, C, H, W)
  n.lstm_output_conv53 = L.Transpose(n.lstm_output_reshape_conv53,transpose_param=dict(dim=[2,3,1,0]))


# 第三个阶段,BLSTM的输出,经过1x1的卷积降维,4x上采样,裁剪成与原图像大小相同
  n.score_dsn3 = conv1x1(n.lstm_output_conv33, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
  n.score_dsn3_up = upsample(n.score_dsn3, stride=4)
  n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data)

# BalanceCrossEntropyLoss
  if split=='train':
    n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param)  
  if split=='test':
    n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)  

#第四个阶段,BLSTM的输出,经过1x1的卷积降维,8x上采样,裁剪成与原图像大小相同
  n.score_dsn4 = conv1x1(n.lstm_output_conv43, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
  n.score_dsn4_up = upsample(n.score_dsn4, stride=8)
  n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data)

# BalanceCrossEntropyLoss
  if split=='train':
    n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param)  
  if split=='test':
    n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)

# 第五个阶段,BLSTM的输出,经过1x1的卷积降维,16x上采样,裁剪成与原图像大小相同
  n.score_dsn5 = conv1x1(n.lstm_output_conv53, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
  n.score_dsn5_up = upsample(n.score_dsn5, stride=16)
  n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data)

# BalanceCrossEntropyLoss
  if split=='train':
    n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param)  
  if split=='test':
    n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)    


# 将三个阶段的输出,在Channel维度上进行拼接,作为Attention模块的输入
  n.concat_upscore = L.Concat(n.upscore_dsn3,
                      n.upscore_dsn4,
                      n.upscore_dsn5,                      
                      name='concat', concat_param=dict({'concat_dim':1}))

  # upscore_dsn3,upscore_dsn4,upscore_dsn5经3X3的卷积, 降维
  n.output_mask_product03 = L.Convolution(n.upscore_dsn3,
                 num_output=1, kernel_size=3,pad=1,
                 param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1)
  n.output_mask_product04 = L.Convolution(n.upscore_dsn4,
                 num_output=1, kernel_size=3,pad=1,
                 param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1)
  n.output_mask_product05 = L.Convolution(n.upscore_dsn5,
                 num_output=1, kernel_size=3,pad=1,
                 param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1)


### Attention 模块
# 第一个卷积层num_output=512, kernel_size:3x3
  n.att_conv1_mask_512 =  L.Convolution(n.concat_upscore,
                 num_output=512, kernel_size=3,pad=1,
                 param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1)
  n.relu_att_conv1 = L.ReLU(n.att_conv1_mask_512, in_place=True)
  n.drop_att_conv1_mask = L.Dropout(n.relu_att_conv1, dropout_ratio=0.5, in_place=True)  
# 第二个卷积层num_output=3, kernel_size:1x1  
  n.att_fc_mask_512 = L.Convolution(n.drop_att_conv1_mask,
                 num_output=3, kernel_size=1,
                 param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1)
  n.attention = L.Softmax(n.att_fc_mask_512)
# 生成三个注意力权重
  n.attention3,n.attention4,n.attention5= L.Slice(n.attention, name='slice_attention', slice_param=dict(axis=1, slice_point=[1,2]), ntop=3)

# 注意力权重与feature map相乘,进行融合
  n.output_mask3 = L.Eltwise(n.attention3, n.output_mask_product03,operation=P.Eltwise.PROD)
  n.output_mask4 = L.Eltwise(n.attention4, n.output_mask_product04,operation=P.Eltwise.PROD)
  n.output_mask5 = L.Eltwise(n.attention5, n.output_mask_product05,operation=P.Eltwise.PROD)  

  n.output_fusion = L.Eltwise(n.output_mask3, n.output_mask4, n.output_mask5, operation=P.Eltwise.SUM)

#作为对比,不经过Attention模块, 将三个阶段的输出,在Channel维度上进行拼接,经1X1的卷积,输出
  n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', 
                 num_output=1, kernel_size=1,
                 param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)],
                 weight_filler=dict(type='constant', value=0.2), engine=1)

  if split=='train':
    n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param)
    n.loss_output_fusion = L.BalanceCrossEntropyLoss(n.output_fusion, n.label, loss_param=loss_param) 
  if split=='test':
    n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
    n.sigmoid_output_fusion= L.Sigmoid(n.output_fusion)
        
  return n.to_proto()
Пример #21
0
def generate_fc8(split, config):

    n = caffe.NetSpec()
    batch_size = config.N
    mode_str = str(dict(split=split, batch_size=batch_size))
    n.language, n.cont, n.image, n.spatial, n.label = L.Python(module=config.data_provider,
                                                               layer=config.data_provider_layer,
                                                               param_str=mode_str,
                                                               ntop=5)

    # the base net (VGG-16)
    n.conv1_1, n.relu1_1 = conv_relu(n.image, 64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool5 = max_pool(n.relu5_3)

    n.fc6, n.relu6 = fc_relu(n.pool5, 4096,
                             fix_param=config.fix_vgg,
                             finetune=(not config.fix_vgg))
    
    if config.vgg_dropout:
        n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
        n.fc7, n.relu7 = fc_relu(n.drop6, 4096,
                                 fix_param=config.fix_vgg,
                                 finetune=(not config.fix_vgg))
        n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
        n.fc8 = fc(n.drop7, 1000,
                   fix_param=config.fix_vgg,
                   finetune=(not config.fix_vgg))
    else:
        n.fc7, n.relu7 = fc_relu(n.relu6, 4096,
                                 fix_param=config.fix_vgg,
                                 finetune=(not config.fix_vgg))
        n.fc8 = fc(n.relu7, 1000,
                   fix_param=config.fix_vgg,
                   finetune=(not config.fix_vgg))
    return n.to_proto()
Пример #22
0
def vgg_face(split, mean, opt):
    n = caffe.NetSpec()

    # config python data layer
    if split == 'train':
        batch_size = opt.train_batch_size
    if split == 'val':
        batch_size = opt.val_batch_size
    if split == 'test':
        batch_size = opt.test_batch_size

    if split == 'train' or split == 'val':
        dataset_name = opt.train_dataset_name
    else:
        dataset_name = opt.test_dataset_name

    pydata_params = dict(split=split,
                         data_dir=opt.data_dir,
                         batch_size=batch_size,
                         mean=mean,
                         dataset=dataset_name,
                         load_size=opt.load_size,
                         crop_size=opt.crop_size)
    n.data, n.label = L.Python(module='faceData_layers',
                               layer='FaceDataLayer',
                               ntop=2,
                               param_str=str(pydata_params))

    # vgg-face net
    # conv layers
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # drop out and fc layers
    n.fc6, n.relu6, n.drop6 = fc_relu_dropout(n.pool5, 4096, 0.5)
    n.fc7, n.relu7, n.drop7 = fc_relu_dropout(n.fc6, 4096, 0.5)

    lr_ratio = 100  # lr multiplier for truncated layers
    n.fc8_face = L.InnerProduct(n.fc7,
                                num_output=1024,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))
    n.fc9_face = L.InnerProduct(n.fc8_face,
                                num_output=2,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))

    # loss layer
    n.loss = L.SoftmaxWithLoss(n.fc9_face, n.label)

    # loss and accuracy layer
    n.acc = L.Accuracy(n.fc9_face, n.label)
    return n.to_proto()
Пример #23
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../data/pascal/VOC2011'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(
        n.drop7,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
                                 convolution_param=dict(num_output=21,
                                                        kernel_size=4,
                                                        stride=2,
                                                        bias_term=False),
                                 param=[dict(lr_mult=0)])

    # scale pool4 skip for compatibility
    n.scale_pool4 = L.Scale(n.pool4,
                            filler=dict(type='constant', value=0.01),
                            param=[dict(lr_mult=0)])
    n.score_pool4 = L.Convolution(
        n.scale_pool4,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool4c = crop(n.score_pool4, n.upscore2)
    n.fuse_pool4 = L.Eltwise(n.upscore2,
                             n.score_pool4c,
                             operation=P.Eltwise.SUM)
    n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
                                      convolution_param=dict(num_output=21,
                                                             kernel_size=4,
                                                             stride=2,
                                                             bias_term=False),
                                      param=[dict(lr_mult=0)])

    # scale pool3 skip for compatibility
    n.scale_pool3 = L.Scale(n.pool3,
                            filler=dict(type='constant', value=0.0001),
                            param=[dict(lr_mult=0)])
    n.score_pool3 = L.Convolution(
        n.scale_pool3,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
    n.fuse_pool3 = L.Eltwise(n.upscore_pool4,
                             n.score_pool3c,
                             operation=P.Eltwise.SUM)
    n.upscore8 = L.Deconvolution(n.fuse_pool3,
                                 convolution_param=dict(num_output=21,
                                                        kernel_size=16,
                                                        stride=8,
                                                        bias_term=False),
                                 param=[dict(lr_mult=0)])

    n.score = crop(n.upscore8, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Пример #24
0
    def generate(self):
        """Returns a NetSpec specifying CaffeNet, following the original proto text
               specification (./models/bvlc_reference_caffenet/train_val.prototxt)."""
        conf = self
        n = caffe.NetSpec()
        param = LT.learned_param if conf.train else LT.frozen_param

        if self.train:

            n.data = L.Python(top=[
                "rois", 'labels', 'bbox_targets', 'bbox_inside_weights',
                'bbox_outside_weights'
            ],
                              python_param=dict(module='roi_data_layer.layer',
                                                layer='RoIDataLayer',
                                                param_str="num_classes: " +
                                                str(conf.num_classes)))
        else:
            n.data, n.im_info = LT.input()

        conv15_param = LT.learned_param if (
            conf.conv_1_to_5_learn) else LT.frozen_param
        LT.conv1_to_5(n, conv15_param)

        if not (self.train):
            n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
                n, self, param)
            n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score,
                                                 [0, 2, -1, 0])
            n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal(
                n, self)

        n.roi_pool = L.ROIPooling(bottom=["conv5", "rois"],
                                  pooled_w=6,
                                  pooled_h=6,
                                  spatial_scale=0.0625)

        n.fc6, n.relu6 = LT.fc_relu(n.roi_pool, 4096, param=param)

        n.drop6 = fc7input = L.Dropout(n.relu6,
                                       in_place=True,
                                       dropout_ratio=0.5,
                                       scale_train=False)
        n.fc7, n.relu7 = LT.fc_relu(fc7input, 4096, param=param)
        n.drop7 = layer7 = L.Dropout(n.relu7,
                                     in_place=True,
                                     dropout_ratio=0.5,
                                     scale_train=False)
        weight_filler = (LT.WEIGHT_FILLER if conf.train else dict())
        bias_filler = (LT.BIAS_FILLER if conf.train else dict())
        n.cls_score = L.InnerProduct(layer7,
                                     num_output=conf.num_classes,
                                     weight_filler=weight_filler,
                                     bias_filler=bias_filler,
                                     param=LT.learned_param)

        n.bbox_pred = L.InnerProduct(layer7,
                                     num_output=conf.num_classes * 4,
                                     weight_filler=weight_filler,
                                     bias_filler=bias_filler,
                                     param=LT.learned_param)

        if conf.train:
            n.loss_cls = LT.soft_max_with_loss(["cls_score", "labels"])
            n.loss_bbox = L.SmoothL1Loss(bottom=[
                "bbox_pred", "bbox_targets", "bbox_inside_weights",
                "bbox_outside_weights"
            ],
                                         loss_weight=1)
        else:
            n.cls_prob = L.Softmax(n.cls_score,
                                   loss_param=dict(ignore_label=-1,
                                                   normalize=True))

        if self.train:
            n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
                n, self, LT.frozen_param)

        n.silence_rpn_cls_score = LT.silence(n.rpn_cls_score)
        n.silence_rpn_bbox_pred = LT.silence(n.rpn_bbox_pred)
        # write the net to a temporary file and return its filename
        return self.save(n)
Пример #25
0
    def define_model(self):
        n = caffe.NetSpec()
        pylayer = 'ClsDataLayer'

        pydata_params = dict(
            phase='train',
            data_root=opt.cls_data_root,
            batch_size=16,
            ratio=5,
            augument=True,
        )
        n.arch1_data, n.arch2_data, n.arch3_data, n.label = L.Python(
            module='data.ClsDataLayer',
            layer=pylayer,
            ntop=4,
            param_str=str(pydata_params))
        n.arch1_conv1 = SingleConv(n.arch1_data,
                                   64,
                                   kernel_size=[3, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch1_conv2 = SingleConv(n.arch1_conv1,
                                   64,
                                   kernel_size=2,
                                   stride=2,
                                   padding=0)
        n.arch1_conv3 = SingleConv(n.arch1_conv2,
                                   64,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
        n.arch1_conv4 = SingleConv(n.arch1_conv3,
                                   64,
                                   kernel_size=[2, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch1_conv5 = SingleConv(n.arch1_conv4,
                                   64,
                                   kernel_size=[1, 4, 4],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch1_flat = L.Flatten(n.arch1_conv5)
        n.arch1_fc1 = L.InnerProduct(n.arch1_flat,
                                     num_output=150,
                                     weight_filler=dict(type='xavier'))
        n.fc1_act = L.ReLU(n.arch1_fc1, engine=3)
        n.arch1 = L.InnerProduct(n.fc1_act,
                                 num_output=2,
                                 weight_filler=dict(type='xavier'))
        n.arch1_loss = L.SoftmaxWithLoss(n.arch1, n.label)

        n.arch2_conv1 = SingleConv(n.arch2_data,
                                   64,
                                   kernel_size=[3, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch2_conv2 = SingleConv(n.arch2_conv1,
                                   64,
                                   kernel_size=2,
                                   stride=2,
                                   padding=0)
        n.arch2_conv3 = SingleConv(n.arch2_conv2,
                                   64,
                                   kernel_size=[1, 2, 2],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch2_conv4 = SingleConv(n.arch2_conv3,
                                   64,
                                   kernel_size=[3, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch2_conv5 = SingleConv(n.arch2_conv4,
                                   64,
                                   kernel_size=[2, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch2_flat = L.Flatten(n.arch2_conv5)
        n.arch2_fc1 = L.InnerProduct(n.arch2_flat,
                                     num_output=250,
                                     weight_filler=dict(type='xavier'))
        n.fc2_act = L.ReLU(n.arch2_fc1, engine=3)
        n.arch2 = L.InnerProduct(n.fc2_act,
                                 num_output=2,
                                 weight_filler=dict(type='xavier'))
        n.arch2_loss = L.SoftmaxWithLoss(n.arch2, n.label)

        n.arch3_conv1 = SingleConv(n.arch3_data,
                                   64,
                                   kernel_size=[3, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch3_conv2 = SingleConv(n.arch3_conv1,
                                   64,
                                   kernel_size=2,
                                   stride=2,
                                   padding=0)
        n.arch3_conv3 = SingleConv(n.arch3_conv2,
                                   64,
                                   kernel_size=[2, 2, 2],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch3_conv4 = SingleConv(n.arch3_conv3,
                                   64,
                                   kernel_size=[3, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch3_conv5 = SingleConv(n.arch3_conv4,
                                   64,
                                   kernel_size=[3, 5, 5],
                                   stride=[1, 1, 1],
                                   padding=[0, 0, 0])
        n.arch3_flat = L.Flatten(n.arch3_conv5)
        n.arch3_fc1 = L.InnerProduct(n.arch3_flat,
                                     num_output=250,
                                     weight_filler=dict(type='xavier'))
        n.fc3_act = L.ReLU(n.arch3_fc1, engine=3)
        n.arch3 = L.InnerProduct(n.fc3_act,
                                 num_output=2,
                                 weight_filler=dict(type='xavier'))
        n.arch3_loss = L.SoftmaxWithLoss(n.arch3, n.label)
        with open(self.model_def, 'w') as f:
            f.write(str(n.to_proto()))
Пример #26
0
def fcn(split):
    n = caffe.NetSpec()
    n.data, n.sem, n.geo = L.Python(
        module='siftflow_layers',
        layer='SIFTFlowSegDataLayer',
        ntop=3,
        param_str=str(
            dict(siftflow_dir='/home/tramac/caffe/examples/fcn/data/sift-flow',
                 split=split,
                 seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)  # fc6->fc6_new
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)  # fc7->fc7_new
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr_sem = L.Convolution(
        n.drop7,
        num_output=2,
        kernel_size=1,
        pad=0,  # 33->2
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore_sem = L.Deconvolution(
        n.score_fr_sem,
        convolution_param=dict(
            num_output=2,
            kernel_size=64,
            stride=32,  ##change 33->2
            bias_term=False),
        param=[dict(lr_mult=0)])
    n.score_sem = crop(n.upscore_sem, n.data)
    # loss to make score happy (o.w. loss_sem)
    n.loss = L.SoftmaxWithLoss(n.score_sem,
                               n.sem,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    n.score_fr_geo = L.Convolution(
        n.drop7,
        num_output=2,
        kernel_size=1,
        pad=0,  #3->2
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore_geo = L.Deconvolution(
        n.score_fr_geo,
        convolution_param=dict(
            num_output=2,
            kernel_size=64,
            stride=32,  # 3->2
            bias_term=False),
        param=[dict(lr_mult=0)])
    n.score_geo = crop(n.upscore_geo, n.data)
    n.loss_geo = L.SoftmaxWithLoss(n.score_geo,
                                   n.geo,
                                   loss_param=dict(normalize=False,
                                                   ignore_label=255))

    return n.to_proto()
Пример #27
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../data/pascal/VOC2011'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # the base net
    n.conv1, n.relu1 = conv_relu(n.data, 96, ks=11, stride=4, pad=100)
    n.pool1 = max_pool(n.relu1)

    n.lrn1 = lrn(n.pool1)

    n.conv2, n.relu2 = conv_relu(n.lrn1, 128, ks=5, stride=1, pad=2)
    n.pool2 = max_pool(n.relu2)

    n.lrn2 = lrn(n.pool2)

    n.conv3, n.relu3 = conv_relu(n.lrn2, 384, ks=3, stride=1, pad=1)
    n.conv4, n.relu4 = conv_relu(n.relu3, 384, ks=3, stride=1, pad=1)
    n.conv5, n.relu5 = conv_relu(n.relu4, 256, ks=3, stride=1, pad=1)
    n.pool5 = max_pool(n.relu5)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=6, stride=1, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, stride=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.score_fr = L.Convolution(
        n.drop7,
        num_output=21,
        kernel_size=1,
        pad=0,
        stride=1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
                                 convolution_param=dict(num_output=21,
                                                        kernel_size=5,
                                                        stride=2,
                                                        bias_term=False),
                                 param=[dict(lr_mult=0)])

    n.score_pool2 = L.Convolution(
        n.pool2,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool2c = crop(n.score_pool2, n.upscore2)
    n.fuse_pool2 = L.Eltwise(n.upscore2,
                             n.score_pool2c,
                             operation=P.Eltwise.SUM)
    n.upscore16 = L.Deconvolution(n.fuse_pool2,
                                  convolution_param=dict(num_output=21,
                                                         kernel_size=31,
                                                         stride=16,
                                                         bias_term=False),
                                  param=[dict(lr_mult=0)])

    n.score = crop(n.upscore16, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Пример #28
0
def net(split):
    n = caffe.NetSpec()
    if split == 'train':
        data_params = dict(mean=(104.00699, 116.66877, 122.67892))
        data_params['root'] = './data/MSRA-B/'
        data_params['source'] = "train_list.txt"
        data_params['shuffle'] = True
        data_params['aug'] = args.aug
        data_params['ignore_label'] = -1  # ignore label
        n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \
        param_str=str(data_params))
        loss_param = dict(normalize=args.lossnorm)
        if data_params.has_key('ignore_label'):
            loss_param['ignore_label'] = data_params['ignore_label']
    elif split == 'test':
        n.data = L.Input(name='data',
                         input_param=dict(shape=dict(dim=[1, 3, 500, 500])))
    else:
        raise Exception("Invalid phase")

    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=5)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)
    n.pool5a = L.Pooling(n.pool5,
                         pool=P.Pooling.AVE,
                         kernel_size=3,
                         stride=1,
                         pad=1)
    ###DSN conv 6###
    n.conv1_dsn6, n.relu1_dsn6 = conv_relu(n.pool5a, 512, ks=7, pad=3)
    n.conv2_dsn6, n.relu2_dsn6 = conv_relu(n.relu1_dsn6, 512, ks=7, pad=3)
    n.conv3_dsn6 = conv1x1(n.relu2_dsn6, 'conv3_dsn6')
    n.score_dsn6_up = upsample(n.conv3_dsn6,
                               stride=32,
                               name='upsample32_in_dsn6')
    n.upscore_dsn6 = crop(n.score_dsn6_up, n.data)
    if split == 'train':
        n.sigmoid_dsn6 = L.Sigmoid(n.upscore_dsn6)
        floss_param = dict()
        floss_param['name'] = 'dsn6'
        floss_param['beta'] = args.beta
        n.loss_dsn6 = L.Python(n.sigmoid_dsn6,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_dsn6 = L.Sigmoid(n.upscore_dsn6)
    ###DSN conv 5###
    n.conv1_dsn5, n.relu1_dsn5 = conv_relu(n.conv5_3, 512, ks=5, pad=2)
    n.conv2_dsn5, n.relu2_dsn5 = conv_relu(n.relu1_dsn5, 512, ks=5, pad=2)
    n.conv3_dsn5 = conv1x1(n.relu2_dsn5, 'conv3_dsn5')
    n.score_dsn5_up = upsample(n.conv3_dsn5,
                               stride=16,
                               name='upsample16_in_dsn5')
    n.upscore_dsn5 = crop(n.score_dsn5_up, n.data)
    if split == 'train':
        n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)
        floss_param['name'] = 'dsn5'
        floss_param['beta'] = args.beta
        n.loss_dsn5 = L.Python(n.sigmoid_dsn5,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)
    ###DSN conv 4###
    n.conv1_dsn4, n.relu1_dsn4 = conv_relu(n.conv4_3, 256, ks=5, pad=2)
    n.conv2_dsn4, n.relu2_dsn4 = conv_relu(n.relu1_dsn4, 256, ks=5, pad=2)
    n.conv3_dsn4 = conv1x1(n.relu2_dsn4, 'conv3_dsn4')

    n.score_dsn6_up_4 = upsample(n.conv3_dsn6, stride=4, name='upsample4_dsn6')
    n.upscore_dsn6_4 = crop(n.score_dsn6_up_4, n.conv3_dsn4)
    n.score_dsn5_up_4 = upsample(n.conv3_dsn5, stride=2, name='upsample2_dsn5')
    n.upscore_dsn5_4 = crop(n.score_dsn5_up_4, n.conv3_dsn4)
    n.concat_dsn4 = L.Eltwise(n.conv3_dsn4,
                              n.upscore_dsn6_4,
                              n.upscore_dsn5_4,
                              name="concat_dsn4")
    n.conv4_dsn4 = conv1x1(n.concat_dsn4, 'conv4_dsn4')
    n.score_dsn4_up = upsample(n.conv4_dsn4,
                               stride=8,
                               name='upsample8_in_dsn4')
    n.upscore_dsn4 = crop(n.score_dsn4_up, n.data)
    if split == 'train':
        n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)
        floss_param['name'] = 'dsn4'
        floss_param['beta'] = args.beta
        n.loss_dsn4 = L.Python(n.sigmoid_dsn4,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)
    ### DSN conv 3 ###
    n.conv1_dsn3, n.relu1_dsn3 = conv_relu(n.conv3_3, 256, ks=5, pad=2)
    n.conv2_dsn3, n.relu2_dsn3 = conv_relu(n.relu1_dsn3, 256, ks=5, pad=2)
    n.conv3_dsn3 = conv1x1(n.relu2_dsn3, 'conv3_dsn3')

    n.score_dsn6_up_3 = upsample(n.conv3_dsn6, stride=8, name='upsample8_dsn6')
    n.upscore_dsn6_3 = crop(n.score_dsn6_up_3, n.conv3_dsn3)
    n.score_dsn5_up_3 = upsample(n.conv3_dsn5, stride=4, name='upsample4_dsn5')
    n.upscore_dsn5_3 = crop(n.score_dsn5_up_3, n.conv3_dsn3)
    n.concat_dsn3 = L.Eltwise(n.conv3_dsn3,
                              n.upscore_dsn6_3,
                              n.upscore_dsn5_3,
                              name='concat')
    n.conv4_dsn3 = conv1x1(n.concat_dsn3, 'conv4_dsn3')
    n.score_dsn3_up = upsample(n.conv4_dsn3,
                               stride=4,
                               name='upsample4_in_dsn3')
    n.upscore_dsn3 = crop(n.score_dsn3_up, n.data)
    if split == 'train':
        n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)
        floss_param['name'] = 'dsn3'
        floss_param['beta'] = args.beta
        n.loss_dsn3 = L.Python(n.sigmoid_dsn3,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)
    ### DSN conv 2 ###
    n.conv1_dsn2, n.relu1_dsn2 = conv_relu(n.conv2_2, 128, ks=3, pad=1)
    n.conv2_dsn2, n.relu2_dsn2 = conv_relu(n.relu1_dsn2, 128, ks=3, pad=1)
    n.conv3_dsn2 = conv1x1(n.relu2_dsn2, 'conv3_dsn2')

    n.score_dsn6_up_2 = upsample(n.conv3_dsn6,
                                 stride=16,
                                 name='upsample16_dsn6')
    n.upscore_dsn6_2 = crop(n.score_dsn6_up_2, n.conv3_dsn2)
    n.score_dsn5_up_2 = upsample(n.conv3_dsn5, stride=8, name='upsample8_dsn5')
    n.upscore_dsn5_2 = crop(n.score_dsn5_up_2, n.conv3_dsn2)
    n.score_dsn4_up_2 = upsample(n.conv4_dsn4, stride=4, name='upsample4_dsn4')
    n.upscore_dsn4_2 = crop(n.score_dsn4_up_2, n.conv3_dsn2)
    n.score_dsn3_up_2 = upsample(n.conv4_dsn3, stride=2, name='upsample2_dsn3')
    n.upscore_dsn3_2 = crop(n.score_dsn3_up_2, n.conv3_dsn2)
    n.concat_dsn2 = L.Eltwise(n.conv3_dsn2,
                              n.upscore_dsn5_2,
                              n.upscore_dsn4_2,
                              n.upscore_dsn6_2,
                              n.upscore_dsn3_2,
                              name='concat')
    n.conv4_dsn2 = conv1x1(n.concat_dsn2, 'conv4_dsn2')
    n.score_dsn2_up = upsample(n.conv4_dsn2,
                               stride=2,
                               name='upsample2_in_dsn2')
    n.upscore_dsn2 = crop(n.score_dsn2_up, n.data)
    if split == 'train':
        n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2)
        floss_param['name'] = 'dsn2'
        floss_param['beta'] = args.beta
        n.loss_dsn2 = L.Python(n.sigmoid_dsn2,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2)
    ## DSN conv 1 ###
    n.conv1_dsn1, n.relu1_dsn1 = conv_relu(n.conv1_2, 128, ks=3, pad=1)
    n.conv2_dsn1, n.relu2_dsn1 = conv_relu(n.relu1_dsn1, 128, ks=3, pad=1)
    n.conv3_dsn1 = conv1x1(n.relu2_dsn1, 'conv3_dsn1')

    n.score_dsn6_up_1 = upsample(n.conv3_dsn6,
                                 stride=32,
                                 name='upsample32_dsn6')
    n.upscore_dsn6_1 = crop(n.score_dsn6_up_1, n.conv3_dsn1)
    n.score_dsn5_up_1 = upsample(n.conv3_dsn5,
                                 stride=16,
                                 name='upsample16_dsn5')
    n.upscore_dsn5_1 = crop(n.score_dsn5_up_1, n.conv3_dsn1)
    n.score_dsn4_up_1 = upsample(n.conv4_dsn4, stride=8, name='upsample8_dsn4')
    n.upscore_dsn4_1 = crop(n.score_dsn4_up_1, n.conv3_dsn1)
    n.score_dsn3_up_1 = upsample(n.conv4_dsn3, stride=4, name='upsample4_dsn3')
    n.upscore_dsn3_1 = crop(n.score_dsn3_up_1, n.conv3_dsn1)

    n.concat_dsn1 = L.Eltwise(n.conv3_dsn1,
                              n.upscore_dsn5_1,
                              n.upscore_dsn4_1,
                              n.upscore_dsn6_1,
                              n.upscore_dsn3_1,
                              name='concat')
    n.score_dsn1_up = conv1x1(n.concat_dsn1, 'conv4_dsn1')
    n.upscore_dsn1 = crop(n.score_dsn1_up, n.data)
    if split == 'train':
        n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1)
        floss_param['name'] = 'dsn1'
        floss_param['beta'] = args.beta
        n.loss_dsn1 = L.Python(n.sigmoid_dsn1,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1)
    ### Eltwise and multiscale weight layer ###
    n.concat_upscore = L.Eltwise(n.upscore_dsn1,
                                 n.upscore_dsn2,
                                 n.upscore_dsn3,
                                 n.upscore_dsn4,
                                 n.upscore_dsn5,
                                 n.upscore_dsn6,
                                 name='concat')
    n.upscore_fuse = conv1x1(n.concat_upscore,
                             'new_score_weighting',
                             wf=dict({
                                 'type': 'constant',
                                 'value': np.float(1) / 6
                             }))
    if split == 'train':
        n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
        floss_param['name'] = 'fuse'
        floss_param['beta'] = args.beta
        n.loss_fuse = L.Python(n.sigmoid_fuse,
                               n.label,
                               module='floss',
                               layer='FmeasureLossLayer',
                               param_str=str(floss_param),
                               ntop=1,
                               loss_weight=1)
    else:
        n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
    return n.to_proto()
Пример #29
0
def create_bnn_cnn_net_fold_stage(num_input_frames,
                                  fold_id='0',
                                  stage_id='1',
                                  phase=None):

    n = caffe.NetSpec()

    if phase == 'TRAIN':
        n.img, n.padimg, n.unary, n.in_features, n.out_features, n.spixel_indices, n.scales1, n.scales2, n.unary_scales, n.label = \
            L.Python(python_param = dict(module = "input_data_layer", layer = "InputRead",
                                        param_str = "TRAIN_1000000_" + fold_id + '_' + stage_id),
                     include = dict(phase = 0),
                     ntop = 10)
    elif phase == 'TEST':
        n.img, n.padimg, n.unary, n.in_features, n.out_features, n.spixel_indices, n.scales1, n.scales2, n.unary_scales, n.label = \
            L.Python(python_param = dict(module = "input_data_layer", layer = "InputRead",
                                         param_str = "VAL_50_" + fold_id + '_' + stage_id),
                     include = dict(phase = 1),
                     ntop = 10)
    else:
        n.img = L.Input(shape=[dict(dim=[1, 3, 480, 854])])
        n.padimg = L.Input(shape=[dict(dim=[1, 3, 481, 857])])

        n.unary = L.Input(
            shape=[dict(dim=[1, 2, num_input_frames, max_spixels])])
        n.in_features = L.Input(
            shape=[dict(dim=[1, 6, num_input_frames, max_spixels])])
        n.out_features = L.Input(shape=[dict(dim=[1, 6, 1, max_spixels])])
        n.spixel_indices = L.Input(shape=[dict(dim=[1, 1, 480, 854])])
        n.scales1 = L.Input(shape=[dict(dim=[1, 6, 1, 1])])
        n.scales2 = L.Input(shape=[dict(dim=[1, 6, 1, 1])])
        n.unary_scales = L.Input(shape=[dict(dim=[1, 1, num_input_frames, 1])])

    n.flatten_scales1 = L.Flatten(n.scales1, flatten_param=dict(axis=0))
    n.flatten_scales2 = L.Flatten(n.scales2, flatten_param=dict(axis=0))
    n.flatten_unary_scales = L.Flatten(n.unary_scales,
                                       flatten_param=dict(axis=0))

    n.in_scaled_features1 = L.Scale(n.in_features,
                                    n.flatten_scales1,
                                    scale_param=dict(axis=1))
    n.out_scaled_features1 = L.Scale(n.out_features,
                                     n.flatten_scales1,
                                     scale_param=dict(axis=1))

    n.in_scaled_features2 = L.Scale(n.in_features,
                                    n.flatten_scales2,
                                    scale_param=dict(axis=1))
    n.out_scaled_features2 = L.Scale(n.out_features,
                                     n.flatten_scales2,
                                     scale_param=dict(axis=1))
    n.scaled_unary = L.Scale(n.unary,
                             n.flatten_unary_scales,
                             scale_param=dict(axis=2))

    ### Start of BNN

    # BNN - stage - 1
    n.out_seg1 = L.Permutohedral(n.scaled_unary,
                                 n.in_scaled_features1,
                                 n.out_scaled_features1,
                                 permutohedral_param=dict(
                                     num_output=32,
                                     group=1,
                                     neighborhood_size=0,
                                     bias_term=True,
                                     norm_type=P.Permutohedral.AFTER,
                                     offset_type=P.Permutohedral.NONE),
                                 filter_filler=dict(type='gaussian', std=0.01),
                                 bias_filler=dict(type='constant', value=0),
                                 param=[{
                                     'lr_mult': 1,
                                     'decay_mult': 1
                                 }, {
                                     'lr_mult': 2,
                                     'decay_mult': 0
                                 }])

    n.out_seg2 = L.Permutohedral(n.scaled_unary,
                                 n.in_scaled_features2,
                                 n.out_scaled_features2,
                                 permutohedral_param=dict(
                                     num_output=32,
                                     group=1,
                                     neighborhood_size=0,
                                     bias_term=True,
                                     norm_type=P.Permutohedral.AFTER,
                                     offset_type=P.Permutohedral.NONE),
                                 filter_filler=dict(type='gaussian', std=0.01),
                                 bias_filler=dict(type='constant', value=0),
                                 param=[{
                                     'lr_mult': 1,
                                     'decay_mult': 1
                                 }, {
                                     'lr_mult': 2,
                                     'decay_mult': 0
                                 }])

    n.concat_out_seg_1 = L.Concat(n.out_seg1,
                                  n.out_seg2,
                                  concat_param=dict(axis=1))
    n.concat_out_relu_1 = L.ReLU(n.concat_out_seg_1, in_place=True)

    # BNN - stage - 2
    n.out_seg3 = L.Permutohedral(n.concat_out_relu_1,
                                 n.out_scaled_features1,
                                 n.out_scaled_features1,
                                 permutohedral_param=dict(
                                     num_output=32,
                                     group=1,
                                     neighborhood_size=0,
                                     bias_term=True,
                                     norm_type=P.Permutohedral.AFTER,
                                     offset_type=P.Permutohedral.NONE),
                                 filter_filler=dict(type='gaussian', std=0.01),
                                 bias_filler=dict(type='constant', value=0),
                                 param=[{
                                     'lr_mult': 1,
                                     'decay_mult': 1
                                 }, {
                                     'lr_mult': 2,
                                     'decay_mult': 0
                                 }])

    n.out_seg4 = L.Permutohedral(n.concat_out_relu_1,
                                 n.out_scaled_features2,
                                 n.out_scaled_features2,
                                 permutohedral_param=dict(
                                     num_output=32,
                                     group=1,
                                     neighborhood_size=0,
                                     bias_term=True,
                                     norm_type=P.Permutohedral.AFTER,
                                     offset_type=P.Permutohedral.NONE),
                                 filter_filler=dict(type='gaussian', std=0.01),
                                 bias_filler=dict(type='constant', value=0),
                                 param=[{
                                     'lr_mult': 1,
                                     'decay_mult': 1
                                 }, {
                                     'lr_mult': 2,
                                     'decay_mult': 0
                                 }])
    n.concat_out_seg_2 = L.Concat(n.out_seg3,
                                  n.out_seg4,
                                  concat_param=dict(axis=1))
    n.concat_out_relu_2 = L.ReLU(n.concat_out_seg_2, in_place=True)

    # BNN - combination
    n.connection_out = L.Concat(n.concat_out_relu_1, n.concat_out_relu_2)
    n.spixel_out_seg = L.Convolution(n.connection_out,
                                     convolution_param=dict(
                                         num_output=2,
                                         kernel_size=1,
                                         stride=1,
                                         weight_filler=dict(type='gaussian',
                                                            std=0.01),
                                         bias_filler=dict(type='constant',
                                                          value=0)),
                                     param=[{
                                         'lr_mult': 1,
                                         'decay_mult': 1
                                     }, {
                                         'lr_mult': 2,
                                         'decay_mult': 0
                                     }])
    n.spixel_out_seg_relu = L.ReLU(n.spixel_out_seg, in_place=True)

    # Going from superpixels to pixels
    n.out_seg_bilateral = L.Smear(n.spixel_out_seg_relu, n.spixel_indices)

    ### BNN - DeepLab Combination
    n.deeplab_seg_presoftmax = deeplab(n.padimg, n.img, n.spixel_indices)
    n.deeplab_seg = L.Softmax(n.deeplab_seg_presoftmax)
    n.bnn_deeplab_connection = L.Concat(n.out_seg_bilateral, n.deeplab_seg)
    n.bnn_deeplab_seg = L.Convolution(n.bnn_deeplab_connection,
                                      convolution_param=dict(
                                          num_output=2,
                                          kernel_size=1,
                                          stride=1,
                                          weight_filler=dict(type='gaussian',
                                                             std=0.01),
                                          bias_filler=dict(type='constant',
                                                           value=0)),
                                      param=[{
                                          'lr_mult': 1,
                                          'decay_mult': 1
                                      }, {
                                          'lr_mult': 2,
                                          'decay_mult': 0
                                      }])
    n.bnn_deeplab_seg_relu = L.ReLU(n.bnn_deeplab_seg, in_place=True)

    ### Start of CNN

    # CNN - Stage 1
    n.out_seg_spatial1 = L.Convolution(n.bnn_deeplab_seg_relu,
                                       convolution_param=dict(
                                           num_output=32,
                                           kernel_size=3,
                                           stride=1,
                                           pad_h=1,
                                           pad_w=1,
                                           weight_filler=dict(type='gaussian',
                                                              std=0.01),
                                           bias_filler=dict(type='constant',
                                                            value=0)),
                                       param=[{
                                           'lr_mult': 1,
                                           'decay_mult': 1
                                       }, {
                                           'lr_mult': 2,
                                           'decay_mult': 0
                                       }])
    n.out_seg_spatial_relu1 = L.ReLU(n.out_seg_spatial1, in_place=True)

    # CNN - Stage 2
    n.out_seg_spatial2 = L.Convolution(n.out_seg_spatial_relu1,
                                       convolution_param=dict(
                                           num_output=32,
                                           kernel_size=3,
                                           stride=1,
                                           pad_h=1,
                                           pad_w=1,
                                           weight_filler=dict(type='gaussian',
                                                              std=0.01),
                                           bias_filler=dict(type='constant',
                                                            value=0)),
                                       param=[{
                                           'lr_mult': 1,
                                           'decay_mult': 1
                                       }, {
                                           'lr_mult': 2,
                                           'decay_mult': 0
                                       }])
    n.out_seg_spatial_relu2 = L.ReLU(n.out_seg_spatial2, in_place=True)

    # CNN - Stage 3
    n.out_seg_spatial = L.Convolution(n.out_seg_spatial_relu2,
                                      convolution_param=dict(
                                          num_output=2,
                                          kernel_size=3,
                                          stride=1,
                                          pad_h=1,
                                          pad_w=1,
                                          weight_filler=dict(type='gaussian',
                                                             std=0.01),
                                          bias_filler=dict(type='constant',
                                                           value=0.5)),
                                      param=[{
                                          'lr_mult': 1,
                                          'decay_mult': 1
                                      }, {
                                          'lr_mult': 2,
                                          'decay_mult': 0
                                      }])

    # Normalization
    n.out_seg = normalize(n.out_seg_spatial, 2)

    if phase == 'TRAIN' or phase == 'TEST':
        n.loss = L.LossWithoutSoftmax(n.out_seg,
                                      n.label,
                                      loss_param=dict(ignore_label=1000),
                                      loss_weight=1)
        n.accuracy = L.Accuracy(n.out_seg,
                                n.label,
                                accuracy_param=dict(ignore_label=1000))
        n.loss2 = L.SoftmaxWithLoss(n.deeplab_seg_presoftmax,
                                    n.label,
                                    loss_param=dict(ignore_label=1000),
                                    loss_weight=1)
        n.accuracy2 = L.Accuracy(n.deeplab_seg_presoftmax,
                                 n.label,
                                 accuracy_param=dict(ignore_label=1000))
    else:
        n.spixel_out_seg_2 = L.SpixelFeature(n.out_seg,
                                             n.spixel_indices,
                                             spixel_feature_param=dict(
                                                 type=P.SpixelFeature.AVGRGB,
                                                 max_spixels=12000,
                                                 rgb_scale=1.0))
        n.spixel_out_seg_final = normalize(n.spixel_out_seg_2, 2)

    return n.to_proto()
Пример #30
0
def rpn(net,
        bottom,
        gt_boxes,
        im_info,
        data,
        anchors,
        feat_stride,
        scales,
        fixed=False,
        deploy=False):
    if not fixed:
        net["rpn_conv/3x3"] = L.Convolution(bottom,
                                            kernel_size=3,
                                            stride=1,
                                            num_output=512,
                                            pad=1,
                                            param=[{
                                                'lr_mult': 1
                                            }, {
                                                'lr_mult': 2
                                            }],
                                            weight_filler=dict(type='gaussian',
                                                               std=0.01),
                                            bias_filler=dict(type='constant',
                                                             value=0),
                                            engine=2)
    else:
        net["rpn_conv/3x3"] = L.Convolution(bottom,
                                            kernel_size=3,
                                            stride=1,
                                            num_output=512,
                                            pad=1,
                                            param=[{
                                                'lr_mult': 0
                                            }, {
                                                'lr_mult': 0
                                            }],
                                            weight_filler=dict(type='gaussian',
                                                               std=0.01),
                                            bias_filler=dict(type='constant',
                                                             value=0),
                                            engine=2)
    net["rpn_relu/3x3"] = L.ReLU(net["rpn_conv/3x3"], in_place=True)
    if not fixed:
        net["rpn_cls_score"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=2 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 1
                                             }, {
                                                 'lr_mult': 2
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
        net["rpn_bbox_pred"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=4 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 1
                                             }, {
                                                 'lr_mult': 2
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
    else:
        net["rpn_cls_score"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=2 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 0
                                             }, {
                                                 'lr_mult': 0
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
        net["rpn_bbox_pred"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=4 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 0
                                             }, {
                                                 'lr_mult': 0
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
    net["rpn_cls_score_reshape"] = L.Reshape(
        net["rpn_cls_score"], reshape_param={"shape": {
            "dim": [0, 2, -1, 0]
        }})

    if (not deploy) and (not fixed):
        net["rpn_labels"], net["rpn_bbox_targets"], net["rpn_bbox_inside_weights"], net[
            "rpn_bbox_outside_weights"] = \
            L.Python(net["rpn_cls_score"], gt_boxes, im_info, data,
                     name='rpn-data',
                     python_param=dict(
                         module='rpn.anchor_target_layer',
                         layer='AnchorTargetLayer',
                         param_str='{"feat_stride": %s,"scales": %s}' % (feat_stride, scales)),
                     # param_str='"feat_stride": %s \n "scales": !!python/tuple %s ' %(feat_stride, scales)),
                     ntop=4, )
        net["rpn_cls_loss"] = L.SoftmaxWithLoss(net["rpn_cls_score_reshape"], net["rpn_labels"],
                                                     name="rpn_loss_cls", propagate_down=[1, 0], \
                                                     loss_weight=1, loss_param={"ignore_label": -1, "normalize": True})
        net["rpn_loss_bbox"] = L.SmoothL1Loss(net["rpn_bbox_pred"], net["rpn_bbox_targets"], \
                                                   net["rpn_bbox_inside_weights"],
                                                   net["rpn_bbox_outside_weights"], \
                                                   name="loss_bbox", loss_weight=1, smooth_l1_loss_param={"sigma": 3.0})
        return net["rpn_cls_loss"], net["rpn_loss_bbox"], net[
            "rpn_cls_score_reshape"], net["rpn_bbox_pred"]
    else:
        return net["rpn_cls_score_reshape"], net["rpn_bbox_pred"]