示例#1
0
def alexnet(train_data_source,
            test_data_source,
            train_batch_size=128,
            test_batch_size=50):
    n = caffe.NetSpec()

    n.data, n.label = L.ImageData(include=dict(phase=caffe.TRAIN),
                                  transform_param=dict(mirror=False,
                                                       crop_size=227,
                                                       scale=1. / 255),
                                  batch_size=train_batch_size,
                                  source=train_data_source,
                                  new_height=256,
                                  new_width=256,
                                  is_color=False,
                                  ntop=2)

    n.test_data, n.test_label = L.ImageData(include=dict(phase=caffe.TEST),
                                            transform_param=dict(mirror=False,
                                                                 crop_size=227,
                                                                 scale=1. /
                                                                 255),
                                            batch_size=test_batch_size,
                                            source=test_data_source,
                                            new_height=256,
                                            new_width=256,
                                            is_color=False,
                                            name='data',
                                            top=['data', 'label'],
                                            in_place=True,
                                            ntop=2)

    n.conv1 = L.Convolution(
        n.data,
        name='conv1',
        num_output=96,
        kernel_size=7,
        stride=2,
        group=1,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.conv1_bn = L.BatchNorm(n.conv1, eps=0)
    n.relu1 = L.ReLU(n.conv1_bn, in_place=True)
    n.pool1 = L.Pooling(n.relu1, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    n.conv2 = L.Convolution(
        n.pool1,
        name='conv2',
        num_output=192,
        pad=1,
        kernel_size=5,
        stride=2,
        group=1,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.conv2_bn = L.BatchNorm(n.conv2, eps=0)
    n.relu2 = L.ReLU(n.conv2_bn, in_place=True)
    n.pool2 = L.Pooling(n.relu2, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    n.conv3 = L.Convolution(
        n.pool2,
        name='conv3',
        num_output=384,
        pad=1,
        kernel_size=3,
        stride=1,
        group=1,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.conv3_bn = L.BatchNorm(n.conv3, eps=0)
    n.relu3 = L.ReLU(n.conv3_bn, in_place=True)

    n.conv4 = L.Convolution(
        n.relu3,
        name='conv4',
        num_output=384,
        pad=1,
        kernel_size=3,
        stride=1,
        group=1,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.conv4_bn = L.BatchNorm(n.conv4, eps=0)
    n.relu4 = L.ReLU(n.conv4_bn, in_place=True)

    n.conv5 = L.Convolution(
        n.relu4,
        name='conv5',
        num_output=192,
        pad=1,
        kernel_size=3,
        stride=1,
        group=1,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.conv5_bn = L.BatchNorm(n.conv5, eps=0)
    n.relu5 = L.ReLU(n.conv5_bn, in_place=True)
    n.pool5 = L.Pooling(n.relu5, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    n.fc6 = L.InnerProduct(
        n.pool5,
        name='fc6',
        num_output=2048,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.fc6_bn = L.BatchNorm(n.fc6, eps=0)
    n.relu6 = L.ReLU(n.fc6_bn, in_place=True)
    n.drop6 = L.Dropout(n.relu6, in_place=True)

    n.fc7 = L.InnerProduct(
        n.drop6,
        name='fc7',
        num_output=1024,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.fc7_bn = L.BatchNorm(n.fc7, eps=0)
    n.relu7 = L.ReLU(n.fc7_bn, in_place=True)
    n.drop7 = L.Dropout(n.relu7, in_place=True)

    n.fc8 = L.InnerProduct(n.drop7, name='fc8', num_output=1221)

    n.accuracy = L.Accuracy(n.fc8, n.label, include=dict(phase=caffe.TEST))
    n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

    return 'name: "AlexNet"\n' + str(n.to_proto())
示例#2
0
    def resnet_layers_proto(self,
                            batch_size,
                            phase='TRAIN',
                            stages=(3, 4, 6, 3)):
        """
            (3, 4, 6, 3) for 50 layers; (3, 4, 23, 3) for 101 layers; (3, 8, 36, 3) for 152 layers
        """
        global use_global_stats

        n = caffe.NetSpec()
        if phase == 'TRAIN':
            n.data, n.label = L.Data(source=self.train_data,
                                     backend=P.Data.LMDB,
                                     batch_size=batch_size,
                                     ntop=2,
                                     include=dict(phase=0),
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[104, 117, 123],
                                         mirror=True))

            use_global_stats = False

        else:
            n.data, n.label = L.Data(source=self.test_data,
                                     backend=P.Data.LMDB,
                                     batch_size=batch_size,
                                     ntop=2,
                                     include=dict(phase=1),
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[104, 117, 123],
                                         mirror=False))

            use_global_stats = True

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = conv_bn_scale_relu(
            n.data,
            num_output=64,
            kernel_size=7,
            stride=2,
            pad=3,
            bias_term=True)
        n.pool1 = L.Pooling(n.conv1,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 64x56x56

        for num in xrange(len(stages)):  # num = 0, 1, 2, 3
            for i in xrange(stages[num]):
                if i == 0:
                    stage_string = branch_shortcut_string
                    bottom_string = [
                        'n.pool1',
                        'n.res2b%s' % str(stages[0] - 1),
                        'n.res3b%s' % str(stages[1] - 1),
                        'n.res4b%s' % str(stages[2] - 1)
                    ][num]
                else:
                    stage_string = branch_string
                    if i == 1:
                        bottom_string = 'n.res%sa' % str(num + 2)
                    else:
                        bottom_string = 'n.res%sb%s' % (str(num + 2),
                                                        str(i - 1))
                exec(
                    stage_string.replace('(stage)', str(num + 2)).replace(
                        '(bottom)', bottom_string).replace(
                            '(num)', str(2**num * 64)).replace(
                                '(order)',
                                str(i)).replace('(stride)',
                                                str(int(num > 0) + 1)))

        exec 'n.pool5 = L.Pooling((bottom), pool=P.Pooling.AVE, global_pooling=True)'.replace(
            '(bottom)', 'n.res5b%s' % str(stages[3] - 1))
        n.classifier = L.InnerProduct(n.pool5, num_output=self.classifier_num)
        n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
        if phase == 'TEST':
            n.accuracy_top1 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))

        return n.to_proto()
示例#3
0
train_net.data, train_net.label = L.Data(source=train_data,backend=P.Data.LMDB, batch_size=64,ntop=2,transform_param=dict(scale=0.00390625))
# 生成LeNet5的主体结构
lenet5_body(train_net, 'data')
# 生成误差损失层
train_net.loss = L.SoftmaxWithLoss(train_net.ip2, train_net.label)

# 测试网络
test_net = caffe.NetSpec()  # 基础网络
# 带标签的数据输入层
test_net.data, test_net.label = L.Data(source=test_data, batch_size=100, backend=P.Data.LMDB, ntop=2,transform_param=dict(scale=0.00390625))
# 生成LeNet5的主体结构
lenet5_body(test_net, 'data')
# 生成误差损失层
test_net.loss = L.SoftmaxWithLoss(test_net.ip2, test_net.label)
# 添加一个精确层
test_net.accuracy = L.Accuracy(test_net.ip2, test_net.label)

# 实施网络
deploy_net = caffe.NetSpec()  # 基础网络
# 带标签的数据输入层
deploy_net.data = L.Input(input_param=dict(shape=dict(dim=[64,1,28,28])))
# 生成LeNet5的主体结构
lenet5_body(deploy_net, 'data')
deploy_net.prob = L.Softmax(deploy_net.ip2)

# 保存训练文件
with open(model_root+'train.prototxt', 'w') as f:
    print('name: "LenNet5_train"', file=f)
    print(train_net.to_proto(), file=f)

with open(model_root+'test.prototxt', 'w') as f:
示例#4
0
def vgg_face(split, mean, opt):
    n = caffe.NetSpec()

    # config python data layer
    if split == 'train':
        batch_size = opt.train_batch_size
    if split == 'val':
        batch_size = opt.val_batch_size
    if split == 'test':
        batch_size = opt.test_batch_size

    if split == 'train' or split == 'val':
        dataset_name = opt.train_dataset_name
    else:
        dataset_name = opt.test_dataset_name

    pydata_params = dict(split=split,
                         data_dir=opt.data_dir,
                         batch_size=batch_size,
                         mean=mean,
                         dataset=dataset_name,
                         load_size=opt.load_size,
                         crop_size=opt.crop_size)
    n.data, n.label = L.Python(module='faceData_layers',
                               layer='FaceDataLayer',
                               ntop=2,
                               param_str=str(pydata_params))

    # vgg-face net
    # conv layers
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # drop out and fc layers
    n.fc6, n.relu6, n.drop6 = fc_relu_dropout(n.pool5, 4096, 0.5)
    n.fc7, n.relu7, n.drop7 = fc_relu_dropout(n.fc6, 4096, 0.5)

    lr_ratio = 100  # lr multiplier for truncated layers
    n.fc8_face = L.InnerProduct(n.fc7,
                                num_output=1024,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))
    n.fc9_face = L.InnerProduct(n.fc8_face,
                                num_output=2,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))

    # loss layer
    n.loss = L.SoftmaxWithLoss(n.fc9_face, n.label)

    # loss and accuracy layer
    n.acc = L.Accuracy(n.fc9_face, n.label)
    return n.to_proto()
def create_neural_net(input_file, batch_size=50):
    net = caffe.NetSpec()
    net.data, net.label = L.Data(batch_size=batch_size,
                                 source=input_file,
                                 backend=caffe.params.Data.LMDB,
                                 ntop=2,
                                 include=dict(phase=caffe.TEST),
                                 name='juniward04')

    ## pre-process
    net.conv1 = L.Convolution(net.data,
                              num_output=16,
                              kernel_size=4,
                              stride=1,
                              pad=1,
                              weight_filler=dict(type='dct4'),
                              param=[{
                                  'lr_mult': 0,
                                  'decay_mult': 0
                              }],
                              bias_term=False)
    TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS
    net.quanttruncabs = L.QuantTruncAbs(net.conv1,
                                        process=TRUNCABS,
                                        threshold=8,
                                        in_place=True)

    ## block 1 16
    [
        net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1,
        net.scale2_1, net.relu512_1, net.conv512_to_256, net.bn2_2,
        net.scale2_2, net.res512_to_256, net.relu512_to_256
    ] = add_downsampling_block_1(net.quanttruncabs, 12)

    #    [net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1,
    #     net.relu512_1, net.conv512_2, net.bn2_2, net.scale2_2, net.relu512_2, net.conv512_to_256,
    #     net.bn2_3, net.scale2_3, net.res512_to_256,
    #     net.relu512_to_256] = add_downsampling_block(net.quanttruncabs, 12)
    ## block 2 13
    [
        net.conv256_1, net.bn2_4, net.scale2_4, net.relu256_1, net.conv256_2,
        net.bn2_5, net.scale2_5, net.relu256_2, net.conv256_3, net.bn2_6,
        net.scale2_6, net.res256_3, net.relu256_3
    ] = add_skip_block(net.res512_to_256, 24)
    ## block 3 16
    [
        net.res256_3_proj, net.bn2_7, net.scale2_7, net.conv256_4, net.bn2_8,
        net.scale2_8, net.relu256_4, net.conv256_5, net.bn2_9, net.scale2_9,
        net.relu256_5, net.conv256_to_128, net.bn2_10, net.scale2_10,
        net.res256_to_128, net.relu256_to_128
    ] = add_downsampling_block(net.res256_3, 24)
    ## block 4 13
    [
        net.conv128_1, net.bn2_11, net.scale2_11, net.relu128_1, net.conv128_2,
        net.bn2_12, net.scale2_12, net.relu128_2, net.conv128_3, net.bn2_13,
        net.scale2_13, net.res128_3, net.relu128_3
    ] = add_skip_block(net.res256_to_128, 48)
    ## block 5 16
    [
        net.res128_3_proj, net.bn2_14, net.scale2_14, net.conv128_4,
        net.bn2_15, net.scale2_15, net.relu128_4, net.conv128_5, net.bn2_16,
        net.scale2_16, net.relu128_5, net.conv128_to_64, net.bn2_17,
        net.scale2_17, net.res128_to_64, net.relu128_to_64
    ] = add_downsampling_block(net.res128_3, 48)
    ## block 6 13
    [
        net.conv64_1, net.bn2_18, net.scale2_18, net.relu64_1, net.conv64_2,
        net.bn2_19, net.scale2_19, net.relu64_2, net.conv64_3, net.bn2_20,
        net.scale2_20, net.res64_3, net.relu64_3
    ] = add_skip_block(net.res128_to_64, 96)
    ## block 7 16
    [
        net.res64_3_proj, net.bn2_21, net.scale2_21, net.conv64_4, net.bn2_22,
        net.scale2_22, net.relu64_4, net.con64_5, net.bn2_23, net.scale2_23,
        net.relu64_5, net.conv64_to_32, net.bn2_24, net.scale2_24,
        net.res64_to_32, net.relu64_to_32
    ] = add_downsampling_block(net.res64_3, 96)
    ## block 8 13
    [
        net.conv32_1, net.bn2_25, net.scale2_25, net.relu32_1, net.conv32_2,
        net.bn2_26, net.scale2_26, net.relu32_2, net.conv32_3, net.bn2_27,
        net.scale2_27, net.res32_3, net.relu32_3
    ] = add_skip_block(net.res64_to_32, 192)
    ##    ## block 9 16
    ##    [net.res32_3_proj, net.bn2_28, net.scale2_28, net.conv32_4, net.bn2_29, net.scale2_29,
    ##     net.relu32_4, net.con32_5, net.bn2_30, net.scale2_30, net.relu32_5, net.conv32_to_16,
    ##     net.bn2_31, net.scale2_31, net.res32_to_16,
    ##     net.relu32_to_16] = add_downsampling_block(net.res32_3, 192)
    [net.res32_3_proj, net.bn2_28,
     net.scale2_28] = add_module(net.res32_3, 2 * 192, 1, 3, 2)
    ## block 10 13
    [
        net.conv16_1, net.bn2_32, net.scale2_32, net.relu16_1, net.conv16_2,
        net.bn2_33, net.scale2_33, net.relu16_2, net.conv16_3, net.bn2_34,
        net.scale2_34, net.res16_3, net.relu16_3
    ] = add_skip_block(net.res32_3_proj, 384)

    ## global pool
    AVE = caffe_pb2.PoolingParameter.AVE
    net.global_pool = L.Pooling(net.res16_3, pool=AVE, kernel_size=8, stride=1)

    ## full connecting
    net.fc = L.InnerProduct(net.global_pool,
                            param=[{
                                'lr_mult': 1
                            }, {
                                'lr_mult': 2
                            }],
                            num_output=2,
                            weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    ## accuracy
    net.accuracy = L.Accuracy(net.fc,
                              net.label,
                              include=dict(phase=caffe.TEST))
    ## loss
    net.loss = L.SoftmaxWithLoss(net.fc, net.label)

    return net.to_proto()
示例#6
0
def buildnet( inputdb, mean_file, batch_size, height, width, nchannels, net_type="train"):
    net = caffe.NetSpec()

    crop_size = -1
    if augment_data:
        crop_size = width

    train = False
    if net_type=="train":
        train = True

    data_layers,label = lt.data_layer_trimese( net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, [1,2], crop_size=768 )

    # First conv  layer
    branch_ends = []
    for n,layer in enumerate(data_layers):
        conv1 = lt.convolution_layer( net, layer, "plane%d_conv1"%(n), "tri_conv1_plane%d"%(n), 64, 2, 5, 3, 0.05, addbatchnorm=True, train=train )
        pool1 = lt.pool_layer( net, conv1, "plane%d_pool1"%(n), 3, 1 )

        conv2 = lt.convolution_layer( net, pool1, "plane%d_conv2"%(n), "tri_conv2_plane%d"%(n), 64, 2, 3, 3, 0.05, addbatchnorm=True, train=train )
        
        conv3 = lt.convolution_layer( net, conv2, "plane%d_conv3"%(n), "tri_conv3_plane%d"%(n), 64, 2, 3, 3, 0.05, addbatchnorm=True, train=train )

        pool3 = lt.pool_layer( net, conv3, "plane%d_pool3"%(n), 3, 1 )

        branch_ends.append( pool3 )
        
    concat = lt.concat_layer( net, "mergeplanes", *branch_ends )


    resnet1  = lt.resnet_module( net, concat,  "resnet1", 64*3, 3, 1, 1,8,32, use_batch_norm, train)
    resnet2  = lt.resnet_module( net, resnet1, "resnet2", 32, 3, 1, 1,8,32, use_batch_norm, train)
    resnet3  = lt.resnet_module( net, resnet2, "resnet3", 32, 3, 1, 1,16,64, use_batch_norm, train)
    
    resnet4  = lt.resnet_module( net, resnet3, "resnet4", 64, 3, 1, 1,16,64, use_batch_norm, train)
    resnet5  = lt.resnet_module( net, resnet4, "resnet5", 64, 3, 1, 1,16,64, use_batch_norm, train)
    resnet6  = lt.resnet_module( net, resnet5, "resnet6", 64, 3, 1, 1,32,128, use_batch_norm, train)

    resnet7  = lt.resnet_module( net, resnet6, "resnet7", 128, 3, 1, 1, 32,128, use_batch_norm, train)
    resnet8  = lt.resnet_module( net, resnet7, "resnet8", 128, 3, 1, 1, 32,128, use_batch_norm, train)
    resnet9  = lt.resnet_module( net, resnet8, "resnet9", 128, 3, 1, 1, 64,256, use_batch_norm, train)
        
    net.lastpool = lt.pool_layer( net, resnet9, "lastpool", 5, 1, P.Pooling.AVE )
    lastpool_layer = net.lastpool
    
    if use_dropout:
        net.lastpool_dropout = L.Dropout(net.lastpool,
                                         in_place=True,
                                         dropout_param=dict(dropout_ratio=0.5))
        lastpool_layer = net.lastpool_dropout
    
    fc1 = lt.final_fully_connect( net, lastpool_layer, nclasses=512 )
    fc2 = lt.final_fully_connect( net, fc1, nclasses=4096 )
    fc3 = lt.final_fully_connect( net, fc2, nclasses=2 )
    
    if train:
        net.loss = L.SoftmaxWithLoss(fc3, net.label )
        net.acc = L.Accuracy(fc3,net.label)
    else:
        net.probt = L.Softmax( fc3 )
        net.acc = L.Accuracy(fc3,net.label)

    return net
示例#7
0
def generate_mobile(method, CHOICE_LIST):
    # new net and new data
    global net
    net = caffe.NetSpec()
    train_part = generate_data(method)
    input_blob_name = "data"
    input_size = 224
    input_channel = 3
    # conv0
    input_blob_name, input_size, input_channel = generate_conv(
        input_blob_name,
        "conv0",
        input_size,
        input_channel,
        3,
        2,
        1,
        16,
        (method == "train_test"),
        True,
    )
    # assert
    assert len(CHOICE_LIST) == len(TYPICAL_CHOICE)
    assert sum([int(v1 != v2)
                for v1, v2 in zip(CHOICE_LIST, TYPICAL_CHOICE)]) in [0, 1]
    for i in range(12):
        layer_name = (["cL%d", "L%d"
                       ][int(CHOICE_LIST[i] == TYPICAL_CHOICE[i])]) % (i + 1)
        input_blob_name, input_size, input_channel = generate_resnet_block(
            input_blob_name,
            layer_name,
            input_size,
            input_channel,
            CHOICE_LIST[i],
            i,
            method,
        )
    # conv_tile
    input_blob_name, input_size, input_channel = generate_conv(
        input_blob_name,
        "conv_tile",
        input_size,
        input_channel,
        1,
        1,
        1,
        1504,
        (method == "train_test"),
        True,
    )
    # avg_pool
    input_blob_name, input_size, input_channel = generate_pooling(
        input_blob_name,
        "conv_tile",
        input_size,
        input_channel,
        Params.Pooling.AVE,
        7,
        7,
    )
    # fc loss and accuracy layer
    input_blob_name = generate_fc(input_blob_name, "FC", 1000, False, False)
    if method == "train_test":
        ## loss and accuracy
        net.tops["loss"] = Layers.SoftmaxWithLoss(
            net.tops[input_blob_name],
            net.label,
            name="loss",
            ntop=1,
            include=dict(phase=caffe.TRAIN),
        )
        net.tops["accuracy"] = Layers.Accuracy(
            net.tops[input_blob_name],
            net.label,
            name="accuracy",
            ntop=1,
            include=dict(phase=caffe.TEST),
        )
        return train_part + str(net.to_proto())
    else:
        return str(net.to_proto())
示例#8
0
def InceptionResNetV2(train_lmdb,
                      test_lmdb,
                      input_size=299,
                      batch_size=256,
                      stages=[0, 5, 10, 5],
                      first_output=32,
                      include_acc=False):
    # now, this code can't recognize include phase, so there will only be a TEST phase data layer
    data, label = L.Data(source=train_lmdb,
                         backend=P.Data.LMDB,
                         batch_size=batch_size,
                         ntop=2,
                         transform_param=dict(crop_size=input_size,
                                              mean_value=[104, 117, 123],
                                              mirror=True),
                         include=dict(phase=getattr(caffe_pb2, 'TRAIN')))
    data, label = L.Data(source=test_lmdb,
                         backend=P.Data.LMDB,
                         batch_size=batch_size,
                         ntop=2,
                         transform_param=dict(crop_size=input_size,
                                              mean_value=[104, 117, 123],
                                              mirror=True),
                         include=dict(phase=getattr(caffe_pb2, 'TEST')))
    data, label = L.MemoryData(batch_size=batch_size,
                               height=input_size,
                               width=input_size,
                               channels=3,
                               ntop=2,
                               transform_param=dict(mean_value=[104, 117, 123],
                                                    mirror=True),
                               include=dict(phase=getattr(caffe_pb2, 'TEST')))

    Inception_ResNet_A_input = stem(bottom=data,
                                    conv1_num=32,
                                    conv2_num=32,
                                    conv3_num=64,
                                    conv4_num=96,
                                    conv5_num=64,
                                    conv6_num=96,
                                    conv7_num=64,
                                    conv8_num=64,
                                    conv9_num=64,
                                    conv10_num=96,
                                    conv11_num=192)
    for i in xrange(stages[1]):
        Inception_ResNet_A_input = Inception_ResNet_A(
            bottom=Inception_ResNet_A_input,
            bottom_size=384,
            num1x1=32,
            num3x3=48,
            num3x3double=64)

    Inception_ResNet_B_input = ReductionA(bottom=Inception_ResNet_A_input,
                                          num1x1_k=256,
                                          num3x3_l=256,
                                          num3x3_n=384,
                                          num3x3_m=384)

    for i in xrange(stages[2]):
        Inception_ResNet_B_input = Inception_ResNet_B(
            bottom=Inception_ResNet_B_input,
            bottom_size=1152,
            num1x1=192,
            num1x1double=128,
            num7x1=160,
            num1x7=192)

    Inception_ResNet_C_input = ReductionB(bottom=Inception_ResNet_B_input,
                                          num1x1=256,
                                          num3x3=384,
                                          num3x3double=288,
                                          num3x3three=320)

    for i in xrange(stages[3]):
        Inception_ResNet_C_input = Inception_ResNet_C(
            bottom=Inception_ResNet_C_input,
            bottom_size=2144,
            num1x1=192,
            num1x3=224,
            num3x1=256)

    glb_pool = L.Pooling(Inception_ResNet_C_input,
                         pool=P.Pooling.AVE,
                         global_pooling=True)
    dropout = L.Dropout(glb_pool, dropout_ratio=0.2)
    fc = L.InnerProduct(dropout, num_output=1000)
    loss = L.SoftmaxWithLoss(fc, label)
    acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST')))
    return to_proto(loss, acc)
示例#9
0
def accuracy(bottom, label, test_only=False):
    if test_only:
        return L.Accuracy(bottom, label, include=dict(phase=caffe.TEST))
    return L.Accuracy(bottom, label)
示例#10
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(dataset_dir='/home/kevin/dataset/washington_rgbd_dataset', split=split, mean=(104.00698793, 116.66876762, 122.67891434),
            seed=1337, batch_size=128, img_size=(227,227))
    if split == 'deploy':
        n.img = L.Input(name='input', ntop=2, shape=[dict(dim=1),dict(dim=1),dict(dim=224),dict(dim=224)])
    else:
        pylayer = 'WashingtonDataLayer'

    #---------------------------------Data Layer---------------------------------------#
    n.rgb, n.depth, n.label = L.Python(name="data", module='data_layers.washington_data_layer', layer=pylayer,
            ntop=3, param_str=str(pydata_params))


    #---------------------------------RGB-Net---------------------------------------#

    # the caffe-net (alex-net)
    n.rgb_conv1, n.rgb_relu1 = conv_relu(n.rgb, 96, ks=11, stride=4, pad=0)
    n.rgb_pool1 = max_pool(n.rgb_relu1, ks=3)
    n.rgb_norm1 = L.LRN(n.rgb_pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.rgb_conv2, n.rgb_relu2 = conv_relu(n.rgb_norm1, 256, ks=5, pad=2, group=2)
    n.rgb_pool2 = max_pool(n.rgb_relu2, ks=3)
    n.rgb_norm2 = L.LRN(n.rgb_pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.rgb_conv3, n.rgb_relu3 = conv_relu(n.rgb_norm2, 384, ks=3, pad=1, lr1=1, lr2=2)
    n.rgb_conv4, n.rgb_relu4 = conv_relu(n.rgb_relu3, 384, ks=3, pad=1, group=2, lr1=1, lr2=2)

    n.rgb_conv5, n.rgb_relu5 = conv_relu(n.rgb_relu4, 256, ks=3, pad=1, group=2, lr1=1, lr2=2)
    n.rgb_pool5 = max_pool(n.rgb_relu5, ks=3)

    # fully conv
    n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=1, lr2=2)
    n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True)
    n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=1, lr2=2)
    n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True)

    n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=1, lr2=2)


    #---------------------------------Depth-Net---------------------------------------#

    # the caffe-net (alex-net)
    n.depth_conv1, n.depth_relu1 = conv_relu(n.depth, 96, ks=11, stride=4, pad=0)
    n.depth_pool1 = max_pool(n.depth_relu1, ks=3)
    n.depth_norm1 = L.LRN(n.depth_pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.depth_conv2, n.depth_relu2 = conv_relu(n.depth_norm1, 256, ks=5, pad=2, group=2)
    n.depth_pool2 = max_pool(n.depth_relu2, ks=3)
    n.depth_norm2 = L.LRN(n.depth_pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.depth_conv3, n.depth_relu3 = conv_relu(n.depth_norm2, 384, ks=3, pad=1, lr1=1, lr2=2)
    n.depth_conv4, n.depth_relu4 = conv_relu(n.depth_relu3, 384, ks=3, pad=1, group=2, lr1=1, lr2=2)

    n.depth_conv5, n.depth_relu5 = conv_relu(n.depth_relu4, 256, ks=3, pad=1, group=2, lr1=1, lr2=2)
    
    n.depth_pool5 = max_pool(n.depth_relu5, ks=3)

    # fully conv
    n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=1, lr2=2)   
    n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True)
    n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6 , 4096, lr1=1, lr2=2)
    n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True)

    n.depth_fc8 = fc(n.depth_drop7, 51, lr1=1, lr2=2)

    #-----------------------------------final output---------------------------------#
    # Concatenation
    #n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1)
    #n.rgbd_fc8 = fc(n.concat, 51, lr1=1, lr2=2)

    if split != 'deploy':
	n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label)
        n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label)
	n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label)
        n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label)
        #n.overall_accuracy = L.Accuracy(n.rgbd_fc8, n.label)
        #n.overall_loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label)


    return n.to_proto()
示例#11
0
    def alexnet_bn_proto(self, batch_size, phase='TRAIN'):
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            mirror = True
        else:
            source_data = self.test_data
            mirror = False
        n.data, n.label = L.Data(source=source_data,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=227,
                                     mean_value=[104, 117, 123],
                                     mirror=mirror))

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \
            factorization_conv_bn_scale_relu(n.data, num_output=96, kernel_size=11, stride=4,)  # 96x55x55
        n.pool1 = L.Pooling(n.conv1,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 96x27x27

        n.conv2, n.conv2_bn, n.conv2_scale, n.conv2_relu = \
            factorization_conv_bn_scale_relu(n.pool1, num_output=256, kernel_size=5, pad=2)  # 256x27x27
        n.pool2 = L.Pooling(n.conv2,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 256x13x13

        n.conv3, n.conv3_bn, n.conv3_scale, n.conv3_relu = \
            factorization_conv_bn_scale_relu(n.pool2, num_output=384, kernel_size=3, pad=1)  # 384x13x13

        n.conv4, n.conv4_bn, n.conv4_scale, n.conv4_relu = \
            factorization_conv_bn_scale_relu(n.conv3, num_output=384, kernel_size=3, pad=1)  # 384x13x13

        n.conv5, n.conv5_bn, n.conv5_scale, n.conv5_relu = \
            factorization_conv_bn_scale_relu(n.conv4, num_output=256, kernel_size=3, pad=1)  # 256x13x13
        n.pool5 = L.Pooling(n.conv5,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 256x6x16

        n.fc6, n.relu6, n.drop6 = fc_relu_drop(n.pool5,
                                               num_output=2048)  # 1024x1x1
        n.fc7, n.relu7, n.drop7 = fc_relu_drop(n.fc6,
                                               num_output=2048)  # 1024x1x1
        n.fc8 = L.InnerProduct(n.fc7,
                               num_output=self.classifier_num,
                               param=[
                                   dict(lr_mult=1, decay_mult=1),
                                   dict(lr_mult=2, decay_mult=0)
                               ],
                               weight_filler=dict(type='gaussian', std=0.01),
                               bias_filler=dict(type='constant', value=0))
        if phase == 'TRAIN':
            pass
        else:
            n.accuracy_top1 = L.Accuracy(n.fc8, n.label, include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.fc8,
                                         n.label,
                                         include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

        return n.to_proto()
示例#12
0
def buildnet(inputdb,
             mean_file,
             batch_size,
             height,
             width,
             nchannels,
             net_type="train"):
    net = caffe.NetSpec()

    crop_size = -1
    if augment_data:
        crop_size = width

    train = False
    if net_type == "train":
        train = True

    data_layers, label = lt.data_layer_stacked(net,
                                               inputdb,
                                               mean_file,
                                               batch_size,
                                               net_type,
                                               height,
                                               width,
                                               nchannels,
                                               crop_size=crop_size)

    # First conv  layer
    conv1 = lt.convolution_layer(net,
                                 data_layers[0],
                                 "conv1",
                                 "conv1",
                                 16,
                                 2,
                                 7,
                                 3,
                                 0.05,
                                 addbatchnorm=True,
                                 train=train)
    pool1 = lt.pool_layer(net, conv1, "pool1", 5, 3)

    resnet2 = lt.resnet_module(net, pool1, "resnet2", 16, 3, 1, 1, 8, 16,
                               use_batch_norm, train)
    resnet3 = lt.resnet_module(net, resnet2, "resnet3", 16, 3, 1, 1, 8, 16,
                               use_batch_norm, train)
    resnet4 = lt.resnet_module(net, resnet3, "resnet4", 16, 3, 1, 1, 8, 32,
                               use_batch_norm, train)

    resnet5 = lt.resnet_module(net, resnet4, "resnet5", 32, 3, 1, 1, 8, 32,
                               use_batch_norm, train)
    resnet6 = lt.resnet_module(net, resnet5, "resnet6", 32, 3, 1, 1, 8, 32,
                               use_batch_norm, train)
    resnet7 = lt.resnet_module(net, resnet6, "resnet7", 32, 3, 1, 1, 16, 64,
                               use_batch_norm, train)

    resnet8 = lt.resnet_module(net, resnet7, "resnet8", 64, 3, 1, 1, 16, 64,
                               use_batch_norm, train)
    resnet9 = lt.resnet_module(net, resnet8, "resnet9", 64, 3, 1, 1, 16, 64,
                               use_batch_norm, train)
    resnet10 = lt.resnet_module(net, resnet9, "resnet10", 64, 3, 1, 1, 32, 128,
                                use_batch_norm, train)

    net.lastpool = lt.pool_layer(net, resnet10, "lastpool", 7, 1,
                                 P.Pooling.AVE)
    lastpool_layer = net.lastpool

    if use_dropout:
        net.lastpool_dropout = L.Dropout(net.lastpool,
                                         in_place=True,
                                         dropout_param=dict(dropout_ratio=0.5))
        lastpool_layer = net.lastpool_dropout

    fc2 = lt.final_fully_connect(net, lastpool_layer)

    if train:
        net.loss = L.SoftmaxWithLoss(fc2, net.label)
        net.acc = L.Accuracy(fc2, net.label)
    else:
        net.probt = L.Softmax(fc2)
        net.acc = L.Accuracy(fc2, net.label)

    return net
示例#13
0
def create_net(lmdb, mean_file, batch_size, include_acc=False):
    # 网络规范
    net = caffe.NetSpec()

    net.data, net.label = L.Data(source=lmdb,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(crop_size=227,
                                                      mean_file=mean_file,
                                                      mirror=True))

    net.conv1 = L.Convolution(
        net.data,
        num_output=96,
        kernel_size=11,
        stride=4,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type="constant", value=0))

    net.relu1 = L.ReLU(net.conv1, in_place=True)

    net.norm1 = L.LRN(net.conv1, local_size=5, alpha=0.0001, beta=0.75)

    net.pool1 = L.Pooling(net.norm1,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)

    net.conv2 = L.Convolution(
        net.pool1,
        num_output=256,
        pad=2,
        kernel_size=5,
        group=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type="constant", value=0.1))

    net.relu2 = L.ReLU(net.conv2, in_place=True)

    net.norm2 = L.LRN(net.conv2, local_size=5, alpha=0.0001, beta=0.75)

    net.pool2 = L.Pooling(net.norm2,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)

    net.conv3 = L.Convolution(
        net.pool2,
        num_output=384,
        pad=1,
        kernel_size=3,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type="constant", value=0))

    net.relu3 = L.ReLU(net.conv3, in_place=True)

    net.conv4 = L.Convolution(
        net.conv3,
        num_output=384,
        pad=1,
        kernel_size=3,
        group=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type="constant", value=0.1))

    net.relu4 = L.ReLU(net.conv4, in_place=True)

    net.conv5 = L.Convolution(
        net.conv4,
        num_output=256,
        pad=1,
        kernel_size=3,
        group=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type="constant", value=0.1))

    net.relu5 = L.ReLU(net.conv5, in_place=True)

    net.pool5 = L.Pooling(net.conv5,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)

    net.fc6 = L.InnerProduct(
        net.pool5,
        num_output=4096,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.005),
        bias_filler=dict(type="constant", value=0.1))

    net.relu6 = L.ReLU(net.fc6, in_place=True)

    net.drop6 = L.Dropout(net.fc6, dropout_ratio=0.5, in_place=True)

    net.fc7 = L.InnerProduct(
        net.fc6,
        num_output=4096,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.005),
        bias_filler=dict(type="constant", value=0.1))

    net.relu7 = L.ReLU(net.fc7, in_place=True)

    net.drop7 = L.Dropout(net.fc7, dropout_ratio=0.5, in_place=True)

    net.fc8 = L.InnerProduct(
        net.fc7,
        num_output=1000,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type="constant", value=0.1))

    net.loss = L.SoftmaxWithLoss(net.fc8, net.label)

    if include_acc:
        net.acc = L.Accuracy(net.fc8, net.label)
        return net.to_proto()

    return net.to_proto()
示例#14
0
def mobilenet(data, label=None, num_classes=2):
    """Returns a NetSpec specifying MobileNet."""
    n = caffe.NetSpec()
    n["data"] = data

    # conv1
    n["conv1"], n["conv1/bn"], n["conv1/scale"], n["relu1"] = conv(
        n["data"], 32)

    # depthwise conv2_1
    n["conv2_1/dw"], n["conv2_1/dw/bn"], n["conv2_1/dw/scale"], n[
        "relu2_1/dw"] = conv_dw(n["relu1"], 32)
    # pointwise conv2_1
    n["conv2_1/sep"], n["conv2_1/sep/bn"], n["conv2_1/sep/scale"], n[
        "relu2_1/sep"] = conv_pw(n["relu2_1/dw"], 64)

    # depthwise conv2_2
    n["conv2_2/dw"], n["conv2_2/dw/bn"], n["conv2_2/dw/scale"], n[
        "relu2_2/dw"] = conv_dw(n["relu2_1/sep"], 64, 2)
    # pointwise conv2_2
    n["conv2_2/sep"], n["conv2_2/sep/bn"], n["conv2_2/sep/scale"], n[
        "relu2_2/sep"] = conv_pw(n["relu2_2/dw"], 128)

    # depthwise conv3_1
    n["conv3_1/dw"], n["conv3_1/dw/bn"], n["conv3_1/dw/scale"], n[
        "relu3_1/dw"] = conv_dw(n["relu2_2/sep"], 128)
    # pointwise conv3_1
    n["conv3_1/sep"], n["conv3_1/sep/bn"], n["conv3_1/sep/scale"], n[
        "relu3_1/sep"] = conv_pw(n["relu3_1/dw"], 128)

    # depthwise conv3_2
    n["conv3_2/dw"], n["conv3_2/dw/bn"], n["conv3_2/dw/scale"], n[
        "relu3_2/dw"] = conv_dw(n["relu3_1/sep"], 128, 2)
    # pointwise conv3_2
    n["conv3_2/sep"], n["conv3_2/sep/bn"], n["conv3_2/sep/scale"], n[
        "relu3_2/sep"] = conv_pw(n["relu3_2/dw"], 256)

    # depthwise conv4_1
    n["conv4_1/dw"], n["conv4_1/dw/bn"], n["conv4_1/dw/scale"], n[
        "relu4_1/dw"] = conv_dw(n["relu3_2/sep"], 256)
    # pointwise conv4_1
    n["conv4_1/sep"], n["conv4_1/sep/bn"], n["conv4_1/sep/scale"], n[
        "relu4_1/sep"] = conv_pw(n["relu4_1/dw"], 256)

    # depthwise conv4_2
    n["conv4_2/dw"], n["conv4_2/dw/bn"], n["conv4_2/dw/scale"], n[
        "relu4_2/dw"] = conv_dw(n["relu4_1/sep"], 256, 2)
    # pointwise conv4_2
    n["conv4_2/sep"], n["conv4_2/sep/bn"], n["conv4_2/sep/scale"], n[
        "relu4_2/sep"] = conv_pw(n["relu4_2/dw"], 512)

    # depthwise conv5_1
    n["conv5_1/dw"], n["conv5_1/dw/bn"], n["conv5_1/dw/scale"], n[
        "relu5_1/dw"] = conv_dw(n["relu4_2/sep"], 512)
    # pointwise conv5_1
    n["conv5_1/sep"], n["conv5_1/sep/bn"], n["conv5_1/sep/scale"], n[
        "relu5_1/sep"] = conv_pw(n["relu5_1/dw"], 512)

    # depthwise conv5_2
    n["conv5_2/dw"], n["conv5_2/dw/bn"], n["conv5_2/dw/scale"], n[
        "relu5_2/dw"] = conv_dw(n["relu5_1/sep"], 512)
    # pointwise conv5_2
    n["conv5_2/sep"], n["conv5_2/sep/bn"], n["conv5_2/sep/scale"], n[
        "relu5_2/sep"] = conv_pw(n["relu5_2/dw"], 512)

    # depthwise conv5_3
    n["conv5_3/dw"], n["conv5_3/dw/bn"], n["conv5_3/dw/scale"], n[
        "relu5_3/dw"] = conv_dw(n["relu5_2/sep"], 512)
    # pointwise conv5_3
    n["conv5_3/sep"], n["conv5_3/sep/bn"], n["conv5_3/sep/scale"], n[
        "relu5_3/sep"] = conv_pw(n["relu5_3/dw"], 512)

    # depthwise conv5_4
    n["conv5_4/dw"], n["conv5_4/dw/bn"], n["conv5_4/dw/scale"], n[
        "relu5_4/dw"] = conv_dw(n["relu5_3/sep"], 512)
    # pointwise conv5_4
    n["conv5_4/sep"], n["conv5_4/sep/bn"], n["conv5_4/sep/scale"], n[
        "relu5_4/sep"] = conv_pw(n["relu5_4/dw"], 512)

    # depthwise conv5_5
    n["conv5_5/dw"], n["conv5_5/dw/bn"], n["conv5_5/dw/scale"], n[
        "relu5_5/dw"] = conv_dw(n["relu5_4/sep"], 512)
    # pointwise conv5_5
    n["conv5_5/sep"], n["conv5_5/sep/bn"], n["conv5_5/sep/scale"], n[
        "relu5_5/sep"] = conv_pw(n["relu5_5/dw"], 512)

    # depthwise conv5_6
    n["conv5_6/dw"], n["conv5_6/dw/bn"], n["conv5_6/dw/scale"], n[
        "relu5_6/dw"] = conv_dw(n["relu5_5/sep"], 512, 2)
    # pointwise conv5_6
    n["conv5_6/sep"], n["conv5_6/sep/bn"], n["conv5_6/sep/scale"], n[
        "relu5_6/sep"] = conv_pw(n["relu5_6/dw"], 1024)

    # depthwise conv6
    n["conv6/dw"], n["conv6/dw/bn"], n["conv6/dw/scale"], n[
        "relu6/dw"] = conv_dw(n["relu5_6/sep"], 1024)
    # pointwise conv6
    n["conv6/sep"], n["conv6/sep/bn"], n["conv6/sep/scale"], n[
        "relu6/sep"] = conv_pw(n["relu6/dw"], 1024)

    # pool6
    n["pool6"] = L.Pooling(n["relu6/sep"],
                           global_pooling=True,
                           pool=P.Pooling.AVE)

    # fc7_ft
    n["fc7_ft"] = L.Convolution(
        n["pool6"],
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=1,
        num_output=num_classes,
        weight_filler=dict(type='msra'),
        bias_filler=dict(type='constant', value=0))

    if label is not None:
        n["label"] = label
        n["loss"] = L.SoftmaxWithLoss(n["fc7_ft"], n["label"])
        n["acc"] = L.Accuracy(n["fc7_ft"], n["label"])
    else:
        n["prob"] = L.Softmax(n["fc7_ft"])

    return n.to_proto()
示例#15
0
 def accuracy(self, bottom_data, bottom_label, axis=1, ignore_label=-1):
     return L.Accuracy(bottom_data,
                       bottom_label,
                       axis=axis,
                       ignore_label=ignore_label)
示例#16
0
    def resnet_layers_proto(self,
                            batch_size,
                            phase='TRAIN',
                            stages=(3, 4, 6, 3)):
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            need_mirror = True
        else:
            source_data = self.test_data
            need_mirror = False
        n.data, n.label = L.Data(source=source_data,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[128, 128, 128],
                                     mirror=need_mirror))

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \
            block_conv_bn_scale_relu( n.data, num_output = 64, kernel_size = 7, stride = 2, pad = 3 )  # 64x112x112
        n.pool1 = L.Pooling(n.conv1,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)

        residual_num = 0
        for num in xrange(len(stages)):
            for i in xrange(stages[num]):
                residual_num = residual_num + 1

                if num == 0 and i == 0:
                    stage_string = skip_connect_with_dimen_match_no_patch_reduce
                    if residual_num == 1:
                        bottom_string = 'n.pool1'
                    else:
                        bottom_string = 'n.res%s_eletwise' % (
                            str(residual_num - 1))
                elif i == 0 and num > 0:
                    stage_string = skip_connect_with_dimen_match
                    if residual_num == 1:
                        bottom_string = 'n.pool1'
                    else:
                        bottom_string = 'n.res%s_eletwise' % (
                            str(residual_num - 1))
                else:
                    stage_string = skip_connect_no_dimen_match
                    bottom_string = 'n.res%s_eletwise' % (str(residual_num -
                                                              1))
                exec(
                    stage_string.replace('(stage)', str(residual_num)).replace(
                        '(bottom)',
                        bottom_string).replace('(num)', str(2**num * 64)))

        exec 'n.pool5 = L.Pooling( bottom_string, kernel_size=7, stride=1, pool=P.Pooling.AVE)'.replace(
            'bottom_string', 'n.res%s_eletwise' % str(residual_num))

        n.classifier = L.InnerProduct(n.pool5,
                                      num_output=self.classifier_num,
                                      param=[
                                          dict(lr_mult=1, decay_mult=1),
                                          dict(lr_mult=2, decay_mult=0)
                                      ],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant',
                                                       value=0))
        n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
        if phase == 'TRAIN':
            pass
        else:
            n.accuracy_top1 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))

        return n.to_proto()
示例#17
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(dataset_dir='/home/kevin/dataset/processed_data3',
                         variable='depth_map',
                         split=split,
                         mean=(2),
                         seed=1337,
                         batch_size=256,
                         frame_num=30,
                         img_size=(227, 227))
    if split == 'deploy':
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=1),
                   dict(dim=227),
                   dict(dim=227)])
    else:
        if split is 'train':
            pydata_params['dtype'] = 'frame'
            pylayer = 'ModelNetDataLayer'
        else:
            pydata_params['dtype'] = 'object'
            pylayer = 'ModelNetDataLayer'

        n.img, n.label = L.Python(module='data_layers.model_net_layer',
                                  layer=pylayer,
                                  ntop=2,
                                  param_str=str(pydata_params))

    # the base net
    n.conv1, n.relu1 = conv_relu("conv1", n.img, 96, ks=11, stride=4, pad=0)
    n.pool1 = max_pool(n.relu1, ks=3)
    n.norm1 = L.LRN(n.pool1,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv2, n.relu2 = conv_relu("conv2", n.norm1, 256, ks=5, pad=2, group=2)
    n.pool2 = max_pool(n.relu2, ks=3)
    n.norm2 = L.LRN(n.pool2,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv3, n.relu3 = conv_relu("conv3", n.norm2, 384, ks=3, pad=1)

    n.conv4, n.relu4 = conv_relu("conv4", n.relu3, 384, ks=3, pad=1, group=2)

    n.conv5, n.relu5 = conv_relu("conv5", n.relu4, 256, ks=3, pad=1, group=2)

    n.pool5 = max_pool(n.relu5, ks=3)

    n.fc6, n.relu6 = fc_relu(n.pool5, 4096, lr1=1, lr2=2)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = fc_relu(n.drop6, 4096, lr1=1, lr2=2)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.fc8 = fc(n.drop7, 40, lr1=1, lr2=2)

    if split != 'deploy':

        n.accuracy = L.Accuracy(n.fc8, n.label)
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

        #n.loss = L.Python(n.fc8, n.label, loss_weight=1, module='nn_layers.max_softmax_loss_layer', layer='MaxSoftmaxLossLayer')

    # n.display = L.Scale(n.corr, param=[dict(lr_mult=0)], filler=dict(type='constant',value=1.0))
    # n.fc9_bn = L.BatchNorm(n.relu9, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True))

    return n.to_proto()
示例#18
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(dataset_dir='/home/kevin/dataset/rgbd',
                         split=split,
                         mean=(104.00698793, 116.66876762, 122.67891434),
                         seed=1337,
                         img_size=(224, 224),
                         crop_size=(224, 224, 224, 224))

    if split == 'train':
        pylayer = 'RGBDDataLayer'
        pydata_params['randomize'] = True
        pydata_params['batch_size'] = 64
    elif split == 'test':
        pylayer = 'RGBDDataLayer'
        pydata_params['randomize'] = False
        pydata_params['batch_size'] = 1
    else:
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=1),
                   dict(dim=224),
                   dict(dim=224)])

    #---------------------------------Data Layer---------------------------------------#
    n.rgb, n.depth, n.label = L.Python(name="data",
                                       module='data_layers.rgbd_data_layer',
                                       layer=pylayer,
                                       ntop=3,
                                       param_str=str(pydata_params))

    #---------------------------------RGB-Net---------------------------------------#

    # the vgg 16 base net
    n.conv1_1, n.relu1_1 = conv_relu("conv1_1", n.rgb, 64, pad=1, lr1=0, lr2=0)
    n.conv1_2, n.relu1_2 = conv_relu("conv1_2", n.relu1_1, 64, lr1=0, lr2=0)
    n.rgb_pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu("conv2_1", n.rgb_pool1, 128, lr1=0, lr2=0)
    n.conv2_2, n.relu2_2 = conv_relu("conv2_2", n.relu2_1, 128, lr1=0, lr2=0)
    n.rgb_pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu("conv3_1", n.rgb_pool2, 256, lr1=0, lr2=0)
    n.conv3_2, n.relu3_2 = conv_relu("conv3_2", n.relu3_1, 256, lr1=0, lr2=0)
    n.conv3_3, n.relu3_3 = conv_relu("conv3_3", n.relu3_2, 256, lr1=0, lr2=0)
    n.rgb_pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu("conv4_1", n.rgb_pool3, 512, lr1=0, lr2=0)
    n.conv4_2, n.relu4_2 = conv_relu("conv4_2", n.relu4_1, 512, lr1=0, lr2=0)
    n.conv4_3, n.relu4_3 = conv_relu("conv4_3", n.relu4_2, 512, lr1=0, lr2=0)
    n.rgb_pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu("conv5_1", n.rgb_pool4, 512, lr1=0, lr2=0)
    n.conv5_2, n.relu5_2 = conv_relu("conv5_2", n.relu5_1, 512, lr1=0, lr2=0)
    n.conv5_3, n.relu5_3 = conv_relu("conv5_3", n.relu5_2, 512, lr1=0, lr2=0)
    n.rgb_pool5 = max_pool(n.relu5_3)

    # fully conv
    n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=1, lr2=2)
    n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True)
    n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=1, lr2=2)
    n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True)

    # n.rgb_fc8 = fc(n.rgb_drop7, 11, lr1=1, lr2=2)

    #---------------------------------Depth-Net---------------------------------------#

    # the base net
    n.conv1, n.relu1 = conv_relu("conv1",
                                 n.depth,
                                 128,
                                 ks=5,
                                 stride=2,
                                 pad=2,
                                 lr1=0,
                                 lr2=0)
    n.depth_pool1 = max_pool(n.relu1, ks=3)
    n.norm1 = L.LRN(n.depth_pool1,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv2, n.relu2 = conv_relu("conv2",
                                 n.norm1,
                                 256,
                                 ks=5,
                                 stride=1,
                                 pad=2,
                                 lr1=0,
                                 lr2=0)
    n.depth_pool2 = max_pool(n.relu2, ks=3)
    n.norm2 = L.LRN(n.depth_pool2,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv3, n.relu3 = conv_relu("conv3",
                                 n.norm2,
                                 384,
                                 ks=3,
                                 pad=1,
                                 group=2,
                                 lr1=0,
                                 lr2=0)
    n.depth_pool3 = max_pool(n.relu3, ks=3)

    n.conv4, n.relu4 = conv_relu("conv4",
                                 n.depth_pool3,
                                 512,
                                 ks=3,
                                 pad=1,
                                 group=1,
                                 lr1=0,
                                 lr2=0)

    n.conv5, n.relu5 = conv_relu("conv5",
                                 n.relu4,
                                 512,
                                 ks=3,
                                 pad=1,
                                 group=1,
                                 lr1=0,
                                 lr2=0)

    n.depth_pool5 = max_pool(n.relu5, ks=3)

    n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=1, lr2=2)
    n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True)
    n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=1, lr2=2)
    n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True)

    # n.depth_fc8 = fc(n.depth_drop7, 11, lr1=1, lr2=2)

    #-----------------------------------final output---------------------------------#
    # Concatenation
    n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1)

    n.fuse_fc1, n.fuse_relu1 = fc_relu(n.concat, 1024, lr1=1, lr2=2)
    n.fuse_drop1 = L.Dropout(n.fuse_fc1, dropout_ratio=0.5, in_place=True)
    n.fuse_fc2, n.fuse_relu2 = fc_relu(n.fuse_drop1, 1024, lr1=1, lr2=2)
    n.fuse_drop2 = L.Dropout(n.fuse_fc2, dropout_ratio=0.5, in_place=True)

    n.rgbd_fc8 = fc(n.fuse_drop2, 11, lr1=1, lr2=2)

    if split != 'deploy':
        #n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label)
        #n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label)
        #n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label)
        #n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label)
        n.rgbd_accuracy = L.Accuracy(n.rgbd_fc8, n.label)
        n.rgbd_loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label)

    return n.to_proto()
示例#19
0
def segnet_network(data_source, label_source, mode='train'):
    """ Builds a Caffe Network Definition object for SegNet

    Args:
        data_source (str): path to the data LMDB
        label_source (str): path to the label LMDB
        mode (str, optional): 'train', 'test' or 'deploy' (defaults to 'train')

    Returns:
        obj: SegNet (Caffe Network Definition object)
    """
    n = caffe.NetSpec()
    if MEAN_PIXEL is None:
        transform_param = {}
    else:
        transform_param = {'mean_value': MEAN_PIXEL}

    if mode == 'deploy':
        n.data = L.Input(input_param={ 'shape':\
            { 'dim': [BATCH_SIZE, 3, test_patch_size[0], test_patch_size[1]] }
        })
    else:
        n.data = L.Data(batch_size=BATCH_SIZE, backend=P.Data.LMDB,\
                    transform_param=transform_param, source=data_source)
        n.label = L.Data(batch_size=BATCH_SIZE, backend=P.Data.LMDB, source=label_source)

    convolution_block(n, n.data, "conv1_{}", 2, planes=(64,64,64), lr_mult=0.5)
    n.pool1, n.pool1_mask = L.Pooling(n.conv1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2, ntop=2)

    convolution_block(n, n.pool1, "conv2_{}", 2, planes=(128,128,128), lr_mult=0.5)
    n.pool2, n.pool2_mask = L.Pooling(n.conv2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2, ntop=2)

    convolution_block(n, n.pool2, "conv3_{}", 3, planes=(256,256,256), lr_mult=0.5)
    n.pool3, n.pool3_mask = L.Pooling(n.conv3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2, ntop=2)

    convolution_block(n, n.pool3, "conv4_{}", 3, planes=(512,512,512), lr_mult=0.5)
    n.pool4, n.pool4_mask = L.Pooling(n.conv4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2, ntop=2)

    convolution_block(n, n.pool4, "conv5_{}", 3, planes=(512,512,512), lr_mult=0.5)
    n.pool5, n.pool5_mask = L.Pooling(n.conv5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2, ntop=2)

    n.upsample5 = L.Upsample(n.pool5, n.pool5_mask, scale=2)
    convolution_block(n, n.upsample5, "conv5_{}_D", 3, planes=(512,512,512), lr_mult=1, reverse=True)

    n.upsample4 = L.Upsample(n.conv5_1_D, n.pool4_mask, scale=2)
    convolution_block(n, n.upsample4, "conv4_{}_D", 3, planes=(512,512,256), lr_mult=1, reverse=True)

    n.upsample3 = L.Upsample(n.conv4_1_D, n.pool3_mask, scale=2)
    convolution_block(n, n.upsample3, "conv3_{}_D", 3, planes=(256,256,128), lr_mult=1, reverse=True)

    n.upsample2 = L.Upsample(n.conv3_1_D, n.pool2_mask, scale=2)
    convolution_block(n, n.upsample2, "conv2_{}_D", 2, planes=(128,128,64), lr_mult=1, reverse=True)

    n.upsample1 = L.Upsample(n.conv2_1_D, n.pool1_mask, scale=2)
    n.conv1_2_D, n.conv1_2_D_bn, n.conv1_2_D_scale, n.conv1_2_D_relu =\
                                convolution_unit(n.upsample1, 3, 1, 64, lr_mult=1)
    n.conv1_1_D, _, _, _ = convolution_unit(n.conv1_2_D, 3, 1, 6, lr_mult=1)

    if mode == 'train' or mode == 'test':
        n.loss = L.SoftmaxWithLoss(n.conv1_1_D, n.label, loss_param={'ignore_label': IGNORE_LABEL})
        n.accuracy = L.Accuracy(n.conv1_1_D, n.label)
    return n
示例#20
0
def ZFNetBody(net, from_layer, for_training=True):
    net.conv1 = L.Convolution(
        net[from_layer],
        kernel_size=k_conv1,
        stride=s_conv1,
        num_output=d_conv1,
        pad=p_conv1,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu1 = L.ReLU(net.conv1, in_place=True)
    net.pool1 = L.Pooling(net.relu1,
                          pool=P.Pooling.MAX,
                          kernel_size=k_pool1,
                          stride=s_pool1)
    net.norm1 = L.LRN(net.pool1,
                      lrn_param=dict(local_size=local_size_norm1,
                                     alpha=alpha_norm1,
                                     beta=beta_norm1))

    net.conv2 = L.Convolution(
        net.norm1,
        kernel_size=k_conv2,
        stride=s_conv2,
        num_output=d_conv2,  #pad=p_conv2, 
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu2 = L.ReLU(net.conv2, in_place=True)
    net.pool2 = L.Pooling(net.relu2,
                          pool=P.Pooling.MAX,
                          kernel_size=k_pool2,
                          stride=s_pool2)
    net.norm2 = L.LRN(net.pool2,
                      lrn_param=dict(local_size=local_size_norm2,
                                     alpha=alpha_norm2,
                                     beta=beta_norm2))

    net.conv3 = L.Convolution(
        net.norm2,
        kernel_size=k_conv3,
        stride=s_conv3,
        num_output=d_conv3,
        pad=p_conv3,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu3 = L.ReLU(net.conv3, in_place=True)

    net.conv4 = L.Convolution(
        net.relu3,
        kernel_size=k_conv4,
        stride=s_conv4,
        num_output=d_conv4,
        pad=p_conv4,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu4 = L.ReLU(net.conv4, in_place=True)

    net.conv5 = L.Convolution(
        net.relu4,
        kernel_size=k_conv5,
        stride=s_conv5,
        num_output=d_conv5,
        pad=p_conv5,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu5 = L.ReLU(net.conv5, in_place=True)
    net.pool5 = L.Pooling(net.relu5,
                          pool=P.Pooling.MAX,
                          kernel_size=k_pool5,
                          stride=s_pool5)

    net.fc6 = L.InnerProduct(
        net.pool5,
        num_output=k_ip6,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu6 = L.ReLU(net.fc6, in_place=True)
    net.drop6 = L.Dropout(net.relu6,
                          dropout_param=dict(dropout_ratio=r_drop6),
                          in_place=True)

    net.fc7 = L.InnerProduct(
        net.fc6,
        num_output=k_ip7,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu7 = L.ReLU(net.fc7, in_place=True)
    net.drop7 = L.Dropout(net.relu7,
                          dropout_param=dict(dropout_ratio=r_drop7),
                          in_place=True)

    net.fc8 = L.InnerProduct(
        net.fc7,
        num_output=k_ip8,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    if not for_training:
        net.acc = L.Accuracy(net.fc8,
                             net.label,
                             include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    net.loss = L.SoftmaxWithLoss(net.fc8, net.label)

    return net
示例#21
0
def shuffle_net(group,
                scale_f,
                input_size,
                se=False,
                num_classes=1000,
                asoft=True):
    # figure out network structure
    group_defs = {
        1: [36, 72, 144],
        2: [50, 100, 200],
        3: [60, 120, 240],
        4: [68, 136, 272],
        8: [96, 192, 384],
    }
    nouts_list = [int(v * scale_f) for v in group_defs[group]]
    nunits_list = [3, 7, 3]
    f_size = 24

    # setup the first couple of layers
    n = caffe.NetSpec()
    net = n.__dict__['tops']

    n.data, n.label = L.ImageData(batch_size=128,
                                  source="../data/train.list",
                                  root_folder="/",
                                  ntop=2,
                                  include=dict(phase=0),
                                  transform_param=dict(crop_size=input_size,
                                                       mirror=True,
                                                       scale=1 / 128.))

    # The data mean
    n.conv1 = L.Convolution(n.data,
                            kernel_size=3,
                            stride=2,
                            num_output=f_size,
                            pad=1,
                            bias_term=False,
                            param=[dict(lr_mult=1, decay_mult=1)],
                            weight_filler=dict(type="msra"))
    n.conv1_bn = L.BatchNorm(
        n.conv1,
        param=[dict(lr_mult=0),
               dict(lr_mult=0),
               dict(lr_mult=0)],
        in_place=False)
    n.conv1_scale = L.Scale(
        n.conv1_bn,
        scale_param=dict(bias_term=True),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=1)],
        in_place=True)
    n.conv1_relu = L.ReLU(n.conv1_scale, in_place=True)
    n.conv1_pool = L.Pooling(n.conv1_relu, stride=2, kernel_size=3)
    # make the convolutional body

    last_size = f_size / 4
    for i, (nout, nunit) in enumerate(zip(nouts_list, nunits_list)):
        s = 'Step' + str(i + 1) + '_reduction_'
        if i == 0:
            standard_unit(n,
                          nout - last_size,
                          s,
                          group,
                          se=se,
                          newdepth=True,
                          is_first=True)
        else:
            standard_unit(n, nout - last_size, s, group, se=se, newdepth=True)
        last_size = nout
        for unit in range(nunit):
            s = 'Step' + str(i + 1) + '_' + str(unit + 1) + '_'
            standard_unit(n, nout, s, group, se=se)

    # add the end layers

    net = n.__dict__['tops']
    bottom = net[list(net.keys())[-1]]  #find the last layer in netspec

    n.global_pool = L.Pooling(bottom,
                              pooling_param=dict(pool=1, global_pooling=True))
    n.score = L.InnerProduct(n.global_pool,
                             num_output=num_classes,
                             bias_term=False,
                             param=[dict(lr_mult=1, decay_mult=1)],
                             weight_filler=dict(type="msra"))
    n.loss = L.SoftmaxWithLoss(n.score, n.label)
    n.accuracy = L.Accuracy(n.score, n.label)

    return n
def vgg_16(lmdb, bs_train=32, bs_val=10, lmdb_flag=True, not_deploy=True):
    n = caffe.NetSpec()
    if not_deploy:
        if lmdb_flag:
            n.data, n.label = L.Data(source=lmdb + 'cub200_2011_train_lmdb',
                                     backend=P.Data.LMDB,
                                     include=dict(phase=caffe_pb2.TRAIN),
                                     batch_size=bs_train,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[110, 127, 123],
                                         mirror=True))
            data_str = n.to_proto()
            n.data, n.label = L.Data(source=lmdb + 'cub200_2011_val_lmdb',
                                     backend=P.Data.LMDB,
                                     include=dict(phase=caffe_pb2.TEST),
                                     batch_size=bs_val,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[110, 127, 123],
                                         mirror=False))
        else:
            n.data, n.label = L.Data(source=lmdb + 'cub200_2011_train_leveldb',
                                     backend=P.Data.LEVELDB,
                                     include=dict(phase=caffe_pb2.TRAIN),
                                     batch_size=bs_train,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[110, 127, 123],
                                         mirror=True))
            data_str = n.to_proto()
            n.data, n.label = L.Data(source=lmdb + 'cub200_2011_val_leveldb',
                                     backend=P.Data.LEVELDB,
                                     include=dict(phase=caffe_pb2.TEST),
                                     batch_size=bs_val,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[110, 127, 123],
                                         mirror=False))
    else:
        data_str = 'input: "data"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 224\ninput_dim: 224'
        n.data = L.Data()

    # the net itself
    n.conv1_1, n.relu1_1 = conv_relu(n.data, nout=64, pad=1, ks=3)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, nout=64, pad=1, ks=3)
    n.pool1 = max_pool(n.relu1_2, ks=2, stride=2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, nout=128, pad=1, ks=3)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, nout=128, pad=1, ks=3)
    n.pool2 = max_pool(n.relu2_2, ks=2, stride=2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, nout=256, pad=1, ks=3)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, nout=256, pad=1, ks=3)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, nout=256, pad=1, ks=3)
    n.pool3 = max_pool(n.relu3_3, ks=2, stride=2)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, nout=512, pad=1, ks=3)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, nout=512, pad=1, ks=3)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, nout=512, pad=1, ks=3)
    n.pool4 = max_pool(n.relu4_3, ks=2, stride=2)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, nout=512, pad=1, ks=3)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, nout=512, pad=1, ks=3)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, nout=512, pad=1, ks=3)
    n.pool5 = ave_pool(n.relu5_3, ks=14, stride=1)

    n.softmax = L.Convolution(n.pool5,
                              kernel_size=1,
                              num_output=200,
                              param=[
                                  dict(lr_mult=10, decay_mult=10),
                                  dict(lr_mult=20, decay_mult=0)
                              ])
    if not_deploy:
        n.loss = L.SoftmaxWithLoss(n.softmax, n.label)
        n.acc_top_1 = L.Accuracy(n.softmax, n.label, top_k=1)
    else:
        n.prob = L.Softmax(n.softmax)
    model_str = str(n.to_proto())
    if not not_deploy:
        model_str = model_str[54:-1]
    return str(data_str) + '\n' + model_str
示例#23
0
def pj_x(mode, batchsize, T, exp_T, question_vocab_size, exp_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize})
    n.data, n.cont, n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \
        L.Python(module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=8)

    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08), param=fixed_weights)
    n.embed = L.TanH(n.embed_ba) 

    n.exp_embed_ba = L.Embed(n.exp, input_dim=exp_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform', min=-0.08, max=0.08))
    n.exp_embed = L.TanH(n.exp_embed_ba)

    # LSTM1
    n.lstm1 = L.LSTM(\
                   n.embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)),
                   param=fixed_weights_lstm)
    tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis':0})
    for i in range(T-1):
        n.__setattr__('slice_first'+str(i), tops1[int(i)])
        n.__setattr__('silence_data_first'+str(i), L.Silence(tops1[int(i)],ntop=0))
    n.lstm1_out = tops1[T-1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,dropout_param={'dropout_ratio':0.3})
    n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':0.3})
    # LSTM2
    n.lstm2 = L.LSTM(\
                   n.lstm1_droped, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)),
                   param=fixed_weights_lstm)
    tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis':0})
    for i in range(T-1):
        n.__setattr__('slice_second'+str(i), tops2[int(i)])
        n.__setattr__('silence_data_second'+str(i), L.Silence(tops2[int(i)],ntop=0))
    n.lstm2_out = tops2[T-1]
    n.lstm2_reshaped = L.Reshape(n.lstm2_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,dropout_param={'dropout_ratio':0.3})
    concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped]
    n.lstm_12 = L.Concat(*concat_botom)


    # Tile question feature
    n.q_emb_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.q_emb_tiled_1 = L.Tile(n.q_emb_resh, axis=2, tiles=14)
    n.q_emb_resh_tiled = L.Tile(n.q_emb_tiled_1, axis=3, tiles=14)

    # Embed image feature
    n.i_emb = L.Convolution(n.img_feature, kernel_size=1, stride=1,
                            num_output=2048, pad=0, weight_filler=dict(type='xavier'),
                            param=fixed_weights)

    # Eltwise product and normalization
    n.eltwise = L.Eltwise(n.q_emb_resh_tiled, n.i_emb, eltwise_param={'operation': P.Eltwise.PROD})
    n.eltwise_sqrt = L.SignedSqrt(n.eltwise)
    n.eltwise_l2 = L.L2Normalize(n.eltwise_sqrt)
    n.eltwise_drop = L.Dropout(n.eltwise_l2, dropout_param={'dropout_ratio': 0.3})

    # Attention for VQA
    n.att_conv1 = L.Convolution(n.eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att_map = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14])))
    
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.att_feature  = L.SoftAttention(n.img_feature, n.att_map, dummy)
    n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,2048])))

    # eltwise product + normalization again for VQA
    n.i_emb2 = L.InnerProduct(n.att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.eltwise2 = L.Eltwise(n.lstm_12, n.i_emb2, eltwise_param={'operation': P.Eltwise.PROD})
    n.eltwise2_sqrt = L.SignedSqrt(n.eltwise2)
    n.eltwise2_l2 = L.L2Normalize(n.eltwise2_sqrt)
    n.eltwise2_drop = L.Dropout(n.eltwise2_l2, dropout_param={'dropout_ratio': 0.3})

    n.prediction = L.InnerProduct(n.eltwise2_drop, num_output=3000, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)

    # Embed VQA GT answer during training
    n.exp_emb_ans = L.Embed(n.label, input_dim=3000, num_output=300, \
        weight_filler=dict(type='uniform', min=-0.08, max=0.08))
    n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans)
    n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier'))

    # Merge VQA answer and visual+textual feature
    n.exp_emb_resh = L.Reshape(n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14)
    n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14)
    n.eltwise_emb = L.Convolution(n.eltwise, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier'))
    n.exp_eltwise = L.Eltwise(n.eltwise_emb,  n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise)
    n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt)
    n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3})

    # Attention for Explanation
    n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'))
    n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1)
    n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'))
    n.exp_att_reshaped = L.Reshape(n.exp_att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14])))
    n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2)
    n.exp_att_map = L.Reshape(n.exp_att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14])))
    
    exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.exp_att_feature_prev  = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy)
    n.exp_att_feature_resh = L.Reshape(n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'))
    n.exp_lstm12_embed = L.InnerProduct(n.lstm_12, num_output=2048, weight_filler=dict(type='xavier'))
    n.exp_eltwise2 = L.Eltwise(n.exp_lstm12_embed, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_eltwise2, eltwise_param={'operation': P.Eltwise.PROD})


    # LSTM1 for Explanation
    n.exp_lstm1 = L.LSTM(\
                   n.exp_embed, n.exp_cont_1,\
                   recurrent_param=dict(\
                       num_output=2048,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))

    n.exp_lstm1_dropped = L.Dropout(n.exp_lstm1,dropout_param={'dropout_ratio':0.3})

    # merge with LSTM1 for explanation
    n.exp_att_resh = L.Reshape(n.exp_att_feature, reshape_param=dict(shape=dict(dim=[1, -1, 2048])))
    n.exp_att_tiled = L.Tile(n.exp_att_resh, axis=0, tiles=exp_T)
    n.exp_eltwise_all = L.Eltwise(n.exp_lstm1_dropped, n.exp_att_tiled, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_all_sqrt = L.SignedSqrt(n.exp_eltwise_all)
    n.exp_eltwise_all_l2 = L.L2Normalize(n.exp_eltwise_all_sqrt)
    n.exp_eltwise_all_drop = L.Dropout(n.exp_eltwise_all_l2, dropout_param={'dropout_ratio': 0.3})

    # LSTM2 for Explanation
    n.exp_lstm2 = L.LSTM(\
                   n.exp_eltwise_all_drop, n.exp_cont_2,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    n.exp_lstm2_dropped = L.Dropout(n.exp_lstm2,dropout_param={'dropout_ratio':0.3})
    
    n.exp_prediction = L.InnerProduct(n.exp_lstm2_dropped, num_output=exp_vocab_size, weight_filler=dict(type='xavier'), axis=2)

    n.exp_loss = L.SoftmaxWithLoss(n.exp_prediction, n.exp_out,
                                   loss_param=dict(ignore_label=-1),
                                   softmax_param=dict(axis=2))
    n.exp_accuracy = L.Accuracy(n.exp_prediction, n.exp_out, axis=2, ignore_label=-1)

    return n.to_proto()
示例#24
0
def accuracy(input_blob, label_blob):
    return L.Accuracy(input_blob, label_blob, include=dict(phase=caffe.TEST))
示例#25
0
def add_accuracy_layer(net, bottom, name):
    """ Add accuracy layer """
    net[name] = L.Accuracy(bottom[0], bottom[1])
示例#26
0
def construct_fcn(image_lmdb, contour_lmdb, batch_size=1, include_acc=False):
    net = caffe.NetSpec()

    # args for convlution layers
    weight_filler = dict(type='gaussian', mean=0.0, std=0.01)
    bias_filler = dict(type='constant', value=0.1)
    param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]

    net.data = L.Data(source=image_lmdb,
                      backend=P.Data.LMDB,
                      batch_size=batch_size,
                      ntop=1,
                      transform_param=dict(crop_size=0,
                                           mean_value=[77],
                                           mirror=False))
    net.label = L.Data(source=contour_lmdb,
                       backend=P.Data.LMDB,
                       batch_size=batch_size,
                       ntop=1)
    # conv-relu-pool 1
    net.conv1 = L.Convolution(net.data,
                              kernel_size=5,
                              stride=2,
                              num_output=100,
                              pad=50,
                              group=1,
                              weight_filler=weight_filler,
                              bias_filler=bias_filler,
                              param=param)
    net.relu1 = L.ReLU(net.conv1, in_place=True)
    net.pool1 = L.Pooling(net.relu1,
                          pool=P.Pooling.MAX,
                          kernel_size=2,
                          stride=2)
    # conv-relu-pool 2
    net.conv2 = L.Convolution(net.pool1,
                              kernel_size=5,
                              stride=2,
                              num_output=200,
                              pad=0,
                              group=1,
                              weight_filler=weight_filler,
                              bias_filler=bias_filler,
                              param=param)
    net.relu2 = L.ReLU(net.conv2, in_place=True)
    net.pool2 = L.Pooling(net.relu2,
                          pool=P.Pooling.MAX,
                          kernel_size=2,
                          stride=2)
    net.conv3 = L.Convolution(net.pool2,
                              kernel_size=3,
                              stride=1,
                              num_output=300,
                              pad=0,
                              group=1,
                              weight_filler=weight_filler,
                              bias_filler=bias_filler,
                              param=param)
    net.relu3 = L.ReLU(net.conv3, in_place=True)
    net.conv4 = L.Convolution(net.relu3,
                              kernel_size=3,
                              stride=1,
                              num_output=300,
                              pad=0,
                              group=1,
                              weight_filler=weight_filler,
                              bias_filler=bias_filler,
                              param=param)
    net.relu4 = L.ReLU(net.conv4, in_place=True)
    net.drop = L.Dropout(net.relu4, dropout_ratio=0.1, in_place=True)
    net.score_classes = L.Convolution(net.drop,
                                      kernel_size=1,
                                      stride=1,
                                      num_output=2,
                                      pad=0,
                                      group=1,
                                      weight_filler=weight_filler,
                                      bias_filler=bias_filler,
                                      param=param)
    net.upscore = L.Deconvolution(net.score_classes)
    net.score = L.Crop(net.upscore, net.data)
    net.loss = L.SoftmaxWithLoss(net.score,
                                 net.label,
                                 loss_param=dict(normalize=True))
    if include_acc:
        net.accuracy = L.Accuracy(net.score, net.label)

    return net.to_proto()
def create_net(phase):
    global train_transform_param
    global test_transform_param
    train_transform_param = {'mirror': False, 'mean_file': Params['mean_file']}
    test_transform_param = {'mean_file': Params['mean_file']}
    if phase == 'train':
        lmdb_file = Params['train_lmdb']
        transform_param = train_transform_param
        batch_size = Params['batch_size_per_device']
    else:
        lmdb_file = Params['test_lmdb']
        transform_param = test_transform_param
        batch_size = Params['test_batch_size']

    net = caffe.NetSpec()
    if phase == 'test':
        net.data, net.label = L.Data(batch_size=batch_size,
                                     backend=P.Data.LMDB,
                                     source=lmdb_file,
                                     transform_param=transform_param,
                                     ntop=2)
    elif phase == 'train':
        net.data = L.Input(shape=dict(dim=[128, 3, 4, 12]))
        net.label = L.Input(shape=dict(dim=[128]))
    elif phase == 'deploy':
        net.data = L.Input(shape=dict(dim=[1, 3, 4, 12]))
    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.0001),
        'bias_filler': dict(type='constant')
    }
    net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=3, **kwargs)
    net.pool1 = L.Pooling(net.conv1,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)
    net.relu1 = L.ReLU(net.pool1, in_place=True)
    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.005),
        'bias_filler': dict(type='constant')
    }
    net.fc2 = L.InnerProduct(net.pool1, num_output=16, **kwargs)
    net.relu2 = L.ReLU(net.fc2, in_place=True)
    net.drop2 = L.Dropout(net.fc2,
                          in_place=True,
                          dropout_param=dict(dropout_ratio=0.5))
    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=100),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_filler': dict(type='constant', value=0)
    }
    net.fc3 = L.InnerProduct(net.fc2, num_output=2, **kwargs)
    if phase == 'train':
        net.loss = L.SoftmaxWithLoss(net.fc3, net.label)
    elif phase == 'test':
        net.accuracy = L.Accuracy(net.fc3, net.label)
    else:
        net.prob = L.Softmax(net.fc3)

    net_proto = net.to_proto()
    net_proto.name = '{}_{}'.format(Params['model_name'], phase)
    return net_proto
示例#28
0
def densenet(mode, data_file, bs, nlayer, nclass, first_nout=16, growth_rate=16, dropout=0.2):

  net = caffe.NetSpec()

  # data layer ---------------------------------------------------------------
  mirror  = True
  shuffle = True
  if mode == 1: # TEST phase
    mirror  = False
    shuffle = False
  
  transform = dict(scale = 0.0078125,
                    mirror = mirror,
                    #crop_size = 224,
                    mean_value = [127.5, 127.5, 127.5])

  net.data, net.label = L.Data(#include = dict(phase = mode),
                              transform_param = transform,
                              source = data_file,
                              batch_size = bs, 
                              backend = P.Data.LMDB,
                              ntop = 2)
  # net.data, net.label = L.ImageData(#include = dict(phase = mode),
                                    # transform_param = transform,
                                    # source = data_file,
                                    # batch_size = bs,
                                    # shuffle = shuffle,
                                    # #new_height = 256,
                                    # #new_width = 256,
                                    # #is_color = True,
                                    # ntop = 2)

  pre_fmap = 0 # total number of previous feature maps
  
  # first convolution --------------------------------------------------------
  net.conv_1 = L.Convolution(net.data, num_output=first_nout,
                             kernel_size=7, stride=2, pad=3, 
                             weight_filler=dict(type='msra'), 
                             bias_filler=dict(type='constant'),
                             param=[dict(lr_mult=1, decay_mult=1),
                                    dict(lr_mult=2, decay_mult=0)])
  
  net.relu_1 = L.PReLU(net.conv_1, in_place=True)
  
  net.pool_1 = L.Pooling(net.relu_1, pool=P.Pooling.MAX,
                         kernel_size=3, stride=2)
  
  pre_layer = net.pool_1
  pre_fmap += first_nout
  
  # DB + TD ------------------------------------------------------------------
  # +1 in order to make the index values from 1
  for major in xrange(len(nlayer)-1):
    # DB
    for minor in xrange(nlayer[major]):
      pre_layer = cat_layer(net, mode, major+1, minor+1, pre_layer, growth_rate, dropout)
      pre_fmap += growth_rate
    # TD
    pre_layer = transition_down(net, mode, major+1, pre_layer, pre_fmap, dropout)
    pre_fmap = pre_fmap // 2
  
  # last DB, without TD
  major = len(nlayer)
  for minor in xrange(nlayer[-1]):
    pre_layer = cat_layer(net, mode, major, minor+1, pre_layer, growth_rate, dropout)
    pre_fmap += growth_rate
  
  # final layers -------------------------------------------------------------
  use_global_stats = False
  if mode == 1: # TEST phase
    use_global_stats = True
  net.bn_final = L.BatchNorm(pre_layer, in_place=False, 
                             batch_norm_param = dict(use_global_stats=use_global_stats),
                             param=[dict(lr_mult=0, decay_mult=0), 
                                    dict(lr_mult=0, decay_mult=0), 
                                    dict(lr_mult=0, decay_mult=0)])
  net.scale_finel = L.Scale(net.bn_final, bias_term=True, in_place=True, 
                            filler=dict(value=1), bias_filler=dict(value=0))
  net.relu_final = L.PReLU(net.scale_finel, in_place=True)
  net.pool_final = L.Pooling(net.relu_final, pool=P.Pooling.AVE, global_pooling=True)
  
  net.fc_class = L.InnerProduct(net.pool_final, num_output=nclass,
                                weight_filler=dict(type='xavier'), 
                                bias_filler=dict(type='constant'),
                                param=[dict(lr_mult=1, decay_mult=1),
                                       dict(lr_mult=2, decay_mult=0)])
  
  net.loss = L.SoftmaxWithLoss(net.fc_class, net.label)
  
  if mode == 1:
    net.accuracy = L.Accuracy(net.fc_class, net.label)
  
  return str(net.to_proto())
示例#29
0
    def resnet_layers_proto(self,
                            batch_size,
                            phase='TRAIN',
                            stages=(3, 4, 6, 3)):
        """

        :param batch_size: the batch_size of train and test phase
        :param phase: TRAIN or TEST
        :param stages: the num of layers = 2 + 3*sum(stages), layers would better be chosen from [50, 101, 152]
                       {every stage is composed of 1 residual_branch_shortcut module and stage[i]-1 residual_branch
                       modules, each module consists of 3 conv layers}
                        (3, 4, 6, 3) for 50 layers; (3, 4, 23, 3) for 101 layers; (3, 8, 36, 3) for 152 layers
        """
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            mirror = True
        else:
            source_data = self.test_data
            mirror = False
        n.data, n.label = L.Data(source=source_data,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[104, 117, 123],
                                     mirror=mirror))

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \
            conv_bn_scale_relu(n.data, num_output=64, kernel_size=7, stride=2, pad=3)  # 64x112x112
        n.pool1 = L.Pooling(n.conv1,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 64x56x56

        for num in xrange(len(stages)):  # num = 0, 1, 2, 3
            for i in xrange(stages[num]):
                if i == 0:
                    stage_string = branch_shortcut_string
                    bottom_string = [
                        'n.pool1',
                        'n.res2b%s' % str(stages[0] - 1),
                        'n.res3b%s' % str(stages[1] - 1),
                        'n.res4b%s' % str(stages[2] - 1)
                    ][num]
                else:
                    stage_string = branch_string
                    if i == 1:
                        bottom_string = 'n.res%sa' % str(num + 2)
                    else:
                        bottom_string = 'n.res%sb%s' % (str(num + 2),
                                                        str(i - 1))
                exec(
                    stage_string.replace('(stage)', str(num + 2)).replace(
                        '(bottom)', bottom_string).replace(
                            '(num)', str(2**num * 64)).replace(
                                '(order)',
                                str(i)).replace('(stride)',
                                                str(int(num > 0) + 1)))

        exec 'n.pool5 = L.Pooling((bottom), pool=P.Pooling.AVE, global_pooling=True)'.\
            replace('(bottom)', 'n.res5b%s' % str(stages[3] - 1))
        n.classifier = L.InnerProduct(n.pool5,
                                      num_output=self.classifier_num,
                                      param=[
                                          dict(lr_mult=1, decay_mult=1),
                                          dict(lr_mult=2, decay_mult=0)
                                      ],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant',
                                                       value=0))
        n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
        if phase == 'TRAIN':
            pass
        else:
            n.accuracy_top1 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))

        return n.to_proto()
def create_net(train_list,batch_size,include_acc=False):
    spec = caffe.NetSpec()   
    '''NetSpec可以用作命名,下面每一个spec.后面的字符直接就作为了该层的名字,没有使用spec的,系统会自动生成,在函数中的命名就是自动生成的,因为无法传递spec
 spec.data,spec.label=L.ImageData(source=train_list,batch_size=batch_size,shuffle=True,ntop=2,                       transform_param=dict(crop_size=112,mirror=False,scale=0.0078125,mean_value=127.5),phase=0) '''
    spec.data,spec.label=L.ImageData(source=train_list,batch_size=batch_size,shuffle=True,ntop=2,
                           transform_param=dict(crop_size=112,mirror=False,scale=0.0078125,mean_value=127.5),include=dict(phase=caffe.TRAIN))

    spec.conv1=group_conv(spec.data,kernel_size=3,num_output=32,stride=2)
    spec.relu1=after_conv(spec.conv1)
    spec.conv2=group_conv(spec.relu1,num_output=32,GROUP=True,kernel_size=3,stride=1)
    spec.relu2=after_conv(spec.conv2)
    spec.concat1=L.Concat(spec.relu1,spec.relu2,axis=1)
    spec.pooling1 = L.Pooling(spec.concat1,pool=P.Pooling.MAX,stride=2,kernel_size=3)
    spec.relu3=after_conv(spec.pooling1)

    spec.conv3=group_conv(spec.relu3,num_output=64,kernel_size=1,stride=1,pad=0)
    spec.relu4=after_conv(spec.conv3)
    spec.conv4=group_conv(spec.relu4,num_output=64,GROUP=True,kernel_size=3,stride=1)
    spec.relu5=after_conv(spec.conv4)

    spec.concat2=L.Concat(spec.pooling1,spec.relu5,axis=1)
    spec.relu6=after_conv(spec.concat2)
    spec.conv5=group_conv(spec.relu6,num_output=128,kernel_size=1,stride=1,pad=0)
    spec.relu7=after_conv(spec.conv5)
    spec.conv6=group_conv(spec.relu7,num_output=128,GROUP=True,kernel_size=3,stride=1)
    spec.relu8=after_conv(spec.conv6)
    spec.concat3=L.Concat(spec.concat2,spec.relu8,axis=1)

    spec.pooling2 = L.Pooling(spec.concat3,pool=P.Pooling.MAX,stride=2,kernel_size=3)
    spec.relu8=after_conv(spec.pooling2)

    spec.conv7=group_conv(spec.relu8,num_output=256,kernel_size=1,stride=1,pad=0)
    spec.relu9=after_conv(spec.conv7)
    spec.conv8=group_conv(spec.relu9,num_output=256,GROUP=True,kernel_size=3,stride=1)
    spec.relu10=after_conv(spec.conv8)    

    spec.concat4=L.Concat(spec.pooling2,spec.relu10,axis=1)
    spec.relu11=after_conv(spec.concat4)

    spec.conv9=group_conv(spec.relu11,num_output=512,kernel_size=1,stride=1,pad=0)
    spec.relu12=after_conv(spec.conv9)
    spec.conv10=group_conv(spec.relu12,num_output=512,GROUP=True,kernel_size=3,stride=1)
    spec.relu13=after_conv(spec.conv10)

    spec.concat5=L.Concat(spec.concat4,spec.relu13,axis=1)
    spec.relu14=after_conv(spec.concat5)
    spec.pooling3 = L.Pooling(spec.relu14,pool=P.Pooling.MAX,stride=2,kernel_size=3)

    spec.relu15=after_conv(spec.pooling3)
    spec.conv11=group_conv(spec.relu15,num_output=1024,kernel_size=1,stride=1,pad=0)
    spec.relu16=after_conv(spec.conv11)
    spec.conv12=group_conv(spec.relu16,num_output=1024,GROUP=True,kernel_size=3,stride=1)
    spec.relu17=after_conv(spec.conv12)

    #OUT 7
    spec.maxpool=L.Pooling(spec.relu17, pool=P.Pooling.AVE,global_pooling=True)
    spec.fc1=L.InnerProduct(spec.maxpool, num_output=1024,weight_filler=dict(type='xavier'))
    #relu1=L.ReLU(fc1, in_place=True)
    spec.fc2 = L.InnerProduct(spec.fc1, num_output=10000,weight_filler=dict(type='xavier'))
    #,phase=0,0对应TRAIN
    spec.loss = L.SoftmaxWithLoss(spec.fc2, spec.label,include=dict(phase=caffe.TRAIN))

    #acc = L.Accuracy(fc2, label)
    #return to_proto(loss, acc,include=dict(phase=TEST))

    if include_acc:             
        return caffe.to_proto(spec.fc1)
    else:
        spec.acc = L.Accuracy(spec.fc2, spec.label,include=dict(phase=caffe.TEST))
        #return spec.to_proto(spec.loss, spec.acc)
        return spec.to_proto()