Exemplo n.º 1
0
def bn(input, is_train):
    if is_train:
        kwargs = {'engine': 3}
    else:
        kwargs = {'engine': 3, 'use_global_stats': True}
    return L.Scale(L.BatchNorm(input, **kwargs), bias_term=True)
Exemplo n.º 2
0
def mynet(batch, steps, loss_type, dep=False, descr=False, part='gen'):

    conv_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=1)]
    bcnv_lr = [dict(lr_mult=1, decay_mult=1)]
    scale_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1)]
    bn_param = dict(eps=0.001, use_global_stats=False)

    fr_lr = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
    fr_clr = [dict(lr_mult=0, decay_mult=0)]
    #fr_bn = dict(eps=0.001,use_global_stats=True)
    fr_bn = dict(eps=0.001, use_global_stats=False)

    if part == 'gen':
        gen_conv_lr = conv_lr
        gen_bcnv_lr = bcnv_lr
        gen_scale_lr = scale_lr
        gen_bn_param = bn_param
        dsc_conv_lr = fr_lr
    else:
        gen_conv_lr = fr_lr
        gen_bcnv_lr = fr_clr
        gen_scale_lr = fr_lr
        gen_bn_param = fr_bn
        dsc_conv_lr = conv_lr

    n = caffe.NetSpec()

    sp = dict(bias_term=True, filler=dict(value=1.0))

    if dep:
        n.source = L.Input(input_param=dict(shape=[dict(dim=[1, 1, 64, 64])]))
    else:
        if descr:
            if part == 'gen':
                bs = batch
            else:
                bs = batch / 2
        else:
            bs = batch
        n.data = L.Data(
            data_param=dict(source="db", batch_size=bs, backend=P.Data.LMDB))

        n.expected, n.source = L.Slice(n.data,
                                       slice_param=dict(axis=1, slice_point=1),
                                       ntop=2)
        if descr:
            if part != 'gen':
                #n.data_ref = L.Split(n.expected)
                n.data_ref = L.Data(data_param=dict(
                    source="db_ref", batch_size=batch /
                    2, backend=P.Data.LMDB))
                n.label_0 = L.DummyData(shape=[dict(dim=[batch / 2])],
                                        data_filler=dict(value=0.0))
                n.label_1 = L.DummyData(shape=[dict(dim=[batch / 2])],
                                        data_filler=dict(value=1.0))
                n.label = L.Concat(n.label_0,
                                   n.label_1,
                                   concat_param=dict(axis=0))
            else:
                n.label = L.DummyData(shape=[dict(dim=[batch])],
                                      data_filler=dict(value=1.0))

    n.conv1 = L.Convolution(n.source,
                            convolution_param=conv_param_nb(3, 16),
                            param=gen_bcnv_lr)
    n.bn1 = L.BatchNorm(n.conv1, batch_norm_param=gen_bn_param)
    n.scale1 = L.Scale(n.bn1, scale_param=sp, param=gen_scale_lr)
    n.scale1 = L.ReLU(n.scale1)
    inp = "scale1"
    for m in range(steps):
        k = m + 1
        cid1 = "step%d/conv1" % k
        cid2 = "step%d/conv2" % k
        bid1 = "step%d/bn1" % k
        bid2 = "step%d/bn2" % k
        eid = "step%d/elt" % k

        n[cid1] = L.Convolution(n[inp],
                                convolution_param=conv_param_nb(3, 16),
                                param=gen_bcnv_lr)
        n[bid1] = L.BatchNorm(n[cid1], batch_norm_param=gen_bn_param)
        n[bid1] = L.Scale(n[bid1], scale_param=sp, param=gen_scale_lr)
        n[bid1] = L.ReLU(n[bid1])

        n[cid2] = L.Convolution(n[bid1],
                                convolution_param=conv_param_nb(3, 16),
                                param=gen_bcnv_lr)
        n[bid2] = L.BatchNorm(n[cid2], batch_norm_param=gen_bn_param)
        n[bid2] = L.Scale(n[bid2], scale_param=sp, param=gen_scale_lr)
        n[bid2] = L.ReLU(n[bid2])

        n[eid] = L.Eltwise(n[bid2], n[inp])
        inp = eid

    outname = "topconv"
    n[outname] = L.Convolution(n[inp],
                               convolution_param=conv_param(3, 1),
                               param=gen_conv_lr)
    n.generated = L.Sigmoid(n.topconv)
    if not dep:
        lw = 1 if part == 'gen' else 0
        if loss_type == 'euc':
            n.l2_loss = L.EuclideanLoss(n.generated,
                                        n.expected,
                                        name="loss",
                                        loss_weight=lw)
        else:
            n.l2_loss = L.EuclideanLoss(n.generated,
                                        n.expected,
                                        name="loss",
                                        loss_weight=0)
            n.cross_entropy_loss = L.SigmoidCrossEntropyLoss(n.topconv,
                                                             n.expected,
                                                             name="loss",
                                                             loss_weight=lw)
    if descr:
        if part != 'gen':
            n.desc_inp = L.Concat(n.generated,
                                  n.data_ref,
                                  concat_param=dict(axis=0))
            cinp = "desc_inp"
        else:
            cinp = "generated"
        n.d_conv1 = L.Convolution(n[cinp],
                                  convolution_param=conv_param(5, 32),
                                  param=dsc_conv_lr)
        n.d_pool1 = L.Pooling(n.d_conv1,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool1 = L.ReLU(n.d_pool1)

        n.d_conv2 = L.Convolution(n.d_pool1,
                                  convolution_param=conv_param(5, 32),
                                  param=dsc_conv_lr)
        n.d_pool2 = L.Pooling(n.d_conv2,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool2 = L.ReLU(n.d_pool2)

        n.d_conv3 = L.Convolution(n.d_pool2,
                                  convolution_param=conv_param(5, 64),
                                  param=dsc_conv_lr)
        n.d_pool3 = L.Pooling(n.d_conv3,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool3 = L.ReLU(n.d_pool3)

        n.d_conv4 = L.Convolution(n.d_pool3,
                                  convolution_param=conv_param(3, 64),
                                  param=dsc_conv_lr)
        n.d_pool4 = L.Pooling(n.d_conv4,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool4 = L.ReLU(n.d_pool4)

        n.d_ip1 = L.InnerProduct(n.d_pool4,
                                 param=dsc_conv_lr,
                                 inner_product_param=ip_param(512))
        n.d_ip1 = L.ReLU(n.d_ip1)
        n.d_ip2 = L.InnerProduct(n.d_ip1,
                                 param=dsc_conv_lr,
                                 inner_product_param=ip_param(1))

        n.sigmoid_loss = L.SigmoidCrossEntropyLoss(n.d_ip2,
                                                   n.label,
                                                   name="loss",
                                                   loss_weight=100)
        n.score = L.Sigmoid(n.d_ip2)
        n.lbl_flat = L.Reshape(n.label,
                               reshape_param=dict(shape=dict(dim=[-1, 1])))
        n.diff = L.Eltwise(
            n.score,
            n.lbl_flat,
            eltwise_param=dict(coeff=[1.0 / batch, -1.0 / batch]))
        n.error = L.Reduction(n.diff,
                              reduction_param=dict(operation=P.Reduction.ASUM))
        #n.output = L.Split(n[cinp])
        #n.output_labels = L.Split(n.score)
        #n.inputs = n.source

    return n
Exemplo n.º 3
0
def create_bnn_cnn_net(num_input_points, height, width, phase=None):

    n = caffe.NetSpec()

    n.input_color = L.Input(shape=[dict(dim=[1, 2, 1, num_input_points])])
    n.in_features = L.Input(shape=[dict(dim=[1, 4, 1, num_input_points])])
    n.out_features = L.Input(shape=[dict(dim=[1, 4, height, width])])
    n.scales = L.Input(shape=[dict(dim=[1, 4, 1, 1])])

    n.flatten_scales = L.Flatten(n.scales, flatten_param=dict(axis=0))

    n.in_scaled_features = L.Scale(n.in_features,
                                   n.flatten_scales,
                                   scale_param=dict(axis=1))
    n.out_scaled_features = L.Scale(n.out_features,
                                    n.flatten_scales,
                                    scale_param=dict(axis=1))

    ### Start of BNN

    # BNN - stage - 1
    n.out_color1 = L.Permutohedral(
        n.input_color,
        n.in_scaled_features,
        n.out_scaled_features,
        permutohedral_param=dict(num_output=32,
                                 group=1,
                                 neighborhood_size=0,
                                 bias_term=True,
                                 norm_type=P.Permutohedral.AFTER,
                                 offset_type=P.Permutohedral.NONE),
        filter_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0.5),
        param=[{
            'lr_mult': 1,
            'decay_mult': 1
        }, {
            'lr_mult': 2,
            'decay_mult': 0
        }])
    n.bnn_out_relu_1 = L.ReLU(n.out_color1, in_place=True)

    # BNN - stage - 2
    n.out_color2 = L.Permutohedral(n.bnn_out_relu_1,
                                   n.out_scaled_features,
                                   n.out_scaled_features,
                                   permutohedral_param=dict(
                                       num_output=32,
                                       group=1,
                                       neighborhood_size=0,
                                       bias_term=True,
                                       norm_type=P.Permutohedral.AFTER,
                                       offset_type=P.Permutohedral.NONE),
                                   filter_filler=dict(type='gaussian',
                                                      std=0.01),
                                   bias_filler=dict(type='constant', value=0),
                                   param=[{
                                       'lr_mult': 1,
                                       'decay_mult': 1
                                   }, {
                                       'lr_mult': 2,
                                       'decay_mult': 0
                                   }])
    n.bnn_out_relu_2 = L.ReLU(n.out_color2, in_place=True)

    # BNN - combination
    n.connection_out = L.Concat(n.bnn_out_relu_1, n.bnn_out_relu_2)
    n.out_color_bilateral = L.Convolution(
        n.connection_out,
        convolution_param=dict(num_output=2,
                               kernel_size=1,
                               stride=1,
                               weight_filler=dict(type='gaussian', std=0.01),
                               bias_filler=dict(type='constant', value=0)),
        param=[{
            'lr_mult': 1,
            'decay_mult': 1
        }, {
            'lr_mult': 2,
            'decay_mult': 0
        }])
    n.out_color_bilateral_relu = L.ReLU(n.out_color_bilateral, in_place=True)

    ### Start of CNN

    # CNN - Stage 1
    n.out_color_spatial1 = L.Convolution(
        n.out_color_bilateral_relu,
        convolution_param=dict(num_output=32,
                               kernel_size=3,
                               stride=1,
                               pad_h=1,
                               pad_w=1,
                               weight_filler=dict(type='gaussian', std=0.01),
                               bias_filler=dict(type='constant', value=0)),
        param=[{
            'lr_mult': 1,
            'decay_mult': 1
        }, {
            'lr_mult': 2,
            'decay_mult': 0
        }])
    n.out_color_spatial_relu1 = L.ReLU(n.out_color_spatial1, in_place=True)

    # CNN - Stage 2
    n.out_color_spatial2 = L.Convolution(
        n.out_color_spatial_relu1,
        convolution_param=dict(num_output=32,
                               kernel_size=3,
                               stride=1,
                               pad_h=1,
                               pad_w=1,
                               weight_filler=dict(type='gaussian', std=0.01),
                               bias_filler=dict(type='constant', value=0)),
        param=[{
            'lr_mult': 1,
            'decay_mult': 1
        }, {
            'lr_mult': 2,
            'decay_mult': 0
        }])
    n.out_color_spatial_relu2 = L.ReLU(n.out_color_spatial2, in_place=True)

    # CNN - Stage 3
    n.out_color_spatial = L.Convolution(n.out_color_spatial_relu2,
                                        convolution_param=dict(
                                            num_output=2,
                                            kernel_size=3,
                                            stride=1,
                                            pad_h=1,
                                            pad_w=1,
                                            weight_filler=dict(type='gaussian',
                                                               std=0.01),
                                            bias_filler=dict(type='constant',
                                                             value=0)),
                                        param=[{
                                            'lr_mult': 1,
                                            'decay_mult': 1
                                        }, {
                                            'lr_mult': 2,
                                            'decay_mult': 0
                                        }])
    n.out_color_spatial_relu = L.ReLU(n.out_color_spatial, in_place=True)

    n.final_connection_out = L.Concat(n.out_color_bilateral_relu,
                                      n.out_color_spatial_relu)
    n.out_color_result = L.Convolution(n.final_connection_out,
                                       convolution_param=dict(
                                           num_output=2,
                                           kernel_size=1,
                                           stride=1,
                                           weight_filler=dict(type='gaussian',
                                                              std=0.01),
                                           bias_filler=dict(type='constant',
                                                            value=0.0)),
                                       param=[{
                                           'lr_mult': 1,
                                           'decay_mult': 1
                                       }, {
                                           'lr_mult': 2,
                                           'decay_mult': 0
                                       }])

    return n.to_proto()
def densenet(data_file=None,
             mode='train',
             batch_size=64,
             depth=20,
             first_output=32,
             growth_rate=32,
             dropout=0.5):
    if mode == 'train':
        data, label = L.Data(
            source=data_file,
            backend=P.Data.LMDB,
            batch_size=batch_size,
            ntop=2,
            image_data_param=dict(shuffle=True),
            transform_param=
            dict(  #mean_file="/home/ljf/caffe-master/examples/ljftest_alphabet_DenseNet/imagenet_mean.binaryproto"
                crop_size=28,
                #scale=0.00390625,
                mirror=True))
    if mode == 'test':
        data, label = L.Data(
            source=data_file,
            backend=P.Data.LMDB,
            batch_size=batch_size,
            ntop=2,
            #image_data_param=dict(shuffle=True),
            transform_param=
            dict(  #mean_file="/home/ljf/caffe-master/examples/ljftest_alphabet_DenseNet/imagenet_mean.binaryproto"
                crop_size=28,
                #scale=0.00390625,
                #mirror=True
            ))

    nchannels = first_output
    if mode == 'deploy':
        model = L.Convolution(bottom="data",
                              kernel_size=3,
                              stride=1,
                              num_output=nchannels,
                              pad=1,
                              bias_term=False,
                              weight_filler=dict(type='msra'),
                              bias_filler=dict(type='constant'))
    else:
        model = L.Convolution(data,
                              kernel_size=3,
                              stride=1,
                              num_output=nchannels,
                              pad=1,
                              bias_term=False,
                              weight_filler=dict(type='msra'),
                              bias_filler=dict(type='constant'))

    #N = (depth-4)/4
    N = 3
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    N = 3
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    N = 3
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    N = 3
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)
    #    N=7
    #    for i in range(N):
    #        model = add_layer(model, growth_rate, dropout)
    #        nchannels += growth_rate

    model = L.BatchNorm(model,
                        in_place=False,
                        param=[
                            dict(lr_mult=0, decay_mult=0),
                            dict(lr_mult=0, decay_mult=0),
                            dict(lr_mult=0, decay_mult=0)
                        ])
    model = L.Scale(model,
                    bias_term=True,
                    in_place=True,
                    filler=dict(value=1),
                    bias_filler=dict(value=0))
    model = L.ReLU(model, in_place=True)
    model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
    model = L.InnerProduct(model,
                           num_output=10,
                           bias_term=True,
                           weight_filler=dict(type='xavier'),
                           bias_filler=dict(type='constant'))

    if mode == 'deploy':
        prob = L.Softmax(model)
        return to_proto(prob)
    else:
        loss = L.SoftmaxWithLoss(model, label)
        if mode == 'train':
            return to_proto(loss)
        accuracy = L.Accuracy(model, label)
        return to_proto(loss, accuracy)
Exemplo n.º 5
0
def deconv_BN_scale_relu(bottom, nout, ks=3, stride=1, pad=1, bias_term=True):
    deconv = L.Deconvolution(bottom, 
        convolution_param=dict(num_output=nout, kernel_size=ks, stride=stride, pad=pad,
            bias_term=bias_term, weight_filler=dict(type="bilinear") ) )
        # param=[dict(lr_mult=0)])
    return deconv, L.BatchNorm(deconv, in_place=bias_term), L.Scale(deconv, in_place=True, bias_term=bias_term), L.ReLU(deconv, in_place=True)
Exemplo n.º 6
0
def convert_symbol2proto(symbol):
    def looks_like_weight(name):
        """Internal helper to figure out if node should be hidden with `hide_weights`.
        """
        if name.endswith("_weight"):
            return True
        if name.endswith("_bias"):
            return True
        if name.endswith("_beta") or name.endswith("_gamma") or name.endswith("_moving_var") or name.endswith(
                "_moving_mean"):
            return True
        return False

    json_symbol = json.loads(symbol.tojson())
    all_nodes = json_symbol['nodes']
    no_weight_nodes = []
    for node in all_nodes:
        op = node['op']
        name = node['name']
        if op == 'null':
            if looks_like_weight(name):
                continue
        no_weight_nodes.append(node)

    # build next node dict
    next_node = dict()
    for node in no_weight_nodes:
        node_name = node['name']
        for input in node['inputs']:
            last_node_name = all_nodes[input[0]]['name']
            if last_node_name in next_node:
                next_node[last_node_name].append(node_name)
            else:
                next_node[last_node_name] = [node_name]

    supported_op_type = ['null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel',
                         'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat', '_mul_scalar', 'Deconvolution', 'UpSampling']
    top_dict = dict()
    caffe_net = caffe.NetSpec()
    for node in no_weight_nodes:
        if node['op'] == 'null':
            input_param = dict()
            if node['name'] == 'data':
                input_param['shape'] = dict(dim=[1, 3, 160, 160])
            else:
                input_param['shape'] = dict(dim=[1])
            top_data = CL.Input(ntop=1, input_param=input_param)
            top_dict[node['name']] = [top_data]
            setattr(caffe_net, node['name'], top_data)
        elif node['op'].endswith('_copy'):
            pass
        elif node['op'] == 'BatchNorm':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if 'momentum' in attr:
                momentum = float(attr['momentum'])
            else:
                momentum = 0.9
            if 'eps' in attr:
                eps = float(attr['eps'])
            else:
                eps = 0.001
            if NO_INPLACE:
                in_place = False
            bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1,
                                  batch_norm_param=dict(use_global_stats=True,
                                                        moving_average_fraction=momentum,
                                                        eps=eps), in_place=in_place)
            setattr(caffe_net, node['name'], bn_top)
            scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE)
            top_dict[node['name']] = [scale_top]
            setattr(caffe_net, node['name'] + '_scale', scale_top)
        elif node['op'] == 'Convolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Deconvolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            else:
                convolution_param['bias_term'] = False
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'UpSampling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'scale' in attr:
                kernel_size = 2 * eval(attr['scale']) - eval(attr['scale']) % 2
                convolution_param['kernel_size'] = kernel_size
            else:
                convolution_param['kernel_size'] = 1
            convolution_param['bias_term'] = False
            convolution_param['num_output'] = int(attr['num_filter'])
            convolution_param['group'] = int(attr['num_filter'])
            convolution_param['pad'] = int(math.ceil((eval(attr['scale']) - 1) / 2.))
            convolution_param['stride'] = eval(attr['scale'])
            conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1,
                                        convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Activation':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False
            if attr['act_type'] == 'relu':
                ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            elif attr['act_type'] == 'sigmoid':
                ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            elif attr['act_type'] == 'tanh':
                ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            top_dict[node['name']] = [ac_top]
            setattr(caffe_net, node['name'], ac_top)
        elif node['op'] == 'Pooling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            pooling_param = dict()
            if attr['pool_type'] == 'avg':
                pooling_param['pool'] = 1
            elif attr['pool_type'] == 'max':
                pooling_param['pool'] = 0
            else:
                assert False, attr['pool_type']
            if 'global_pool' in attr and eval(attr['global_pool']) is True:
                pooling_param['global_pooling'] = True
            else:
                if 'kernel' in attr:
                    kernel_size = eval(attr['kernel'])
                    assert kernel_size[0] == kernel_size[1]
                    pooling_param['kernel_size'] = kernel_size[0]
                if 'pad' in attr:
                    pad_size = eval(attr['pad'])
                    assert pad_size[0] == pad_size[1]
                    pooling_param['pad'] = pad_size[0]
                if 'stride' in attr:
                    stride_size = eval(attr['stride'])
                    assert stride_size[0] == stride_size[1]
                    pooling_param['stride'] = stride_size[0]
            pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param)
            top_dict[node['name']] = [pool_top]
            setattr(caffe_net, node['name'], pool_top)
        elif node['op'] == 'elemwise_add' or node['op'] == 'add_n':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 1
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1, eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_maximum':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 2
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1, eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_mul_scalar':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False

            scale_top = CL.Scale(top_dict[bottom_node_name][input[1]], ntop=1, scale_param=dict(bias_term=False, filler=dict(value=-1)), in_place=in_place)
            # scale_top = CL.Power(top_dict[bottom_node_name][input[1]], power=1.0, scale=float(attr['scalar']), shift=0, in_place=in_place)

            top_dict[node['name']] = [scale_top]
            setattr(caffe_net, node['name'], scale_top)
        elif node['op'] == 'SliceChannel':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            slice_param = dict()
            slice_param['slice_dim'] = 1
            slice_num = 2
            slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param)
            top_dict[node['name']] = slice_outputs
            for idx, output in enumerate(slice_outputs):
                setattr(caffe_net, node['name'] + '_' + str(idx), output)
        elif node['op'] == 'FullyConnected':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            inner_product_param = dict()
            inner_product_param['num_output'] = int(attr['num_hidden'])
            fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1,
                                     inner_product_param=inner_product_param)
            top_dict[node['name']] = [fc_top]
            setattr(caffe_net, node['name'], fc_top)
        elif node['op'] == 'SoftmaxOutput':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            softmax_loss = CL.SoftmaxWithLoss(top_dict[bottom_node_name_a][input_a[1]],
                                              top_dict[bottom_node_name_b][input_b[1]], ntop=1)
            top_dict[node['name']] = [softmax_loss]
            setattr(caffe_net, node['name'], softmax_loss)
        elif node['op'] == 'Concat':
            if len(node['inputs']) == 2:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
            elif len(node['inputs']) == 3:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                input_c = node['inputs'][2]
                while True:
                    if all_nodes[input_c[0]]['op'] not in supported_op_type:
                        input_c = all_nodes[input_c[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                bottom_node_name_c = all_nodes[input_c[0]]['name']
                concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]],
                                       top_dict[bottom_node_name_b][input_b[1]],
                                       top_dict[bottom_node_name_c][input_c[1]], ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
        else:
            logging.warn('unknown op type = %s' % node['op'])

    return caffe_net.to_proto()
Exemplo n.º 7
0
def ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn,
                use_relu,
                num_output,
                kernel_size,
                pad,
                stride,
                dilation=1,
                use_scale=True,
                eps=0.001,
                conv_prefix='',
                conv_postfix='',
                bn_prefix='',
                bn_postfix='_bn',
                scale_prefix='',
                scale_postfix='_scale',
                bias_prefix='',
                bias_postfix='_bias'):
    if use_bn:
        # parameters for convolution layer with batchnorm.
        kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1)],
            'weight_filler': dict(type='gaussian', std=0.01),
            'bias_term': False,
        }
        # parameters for batchnorm layer.
        bn_kwargs = {
            'param': [
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0),
                dict(lr_mult=0, decay_mult=0)
            ],
            'eps':
            eps,
        }
        # parameters for scale bias layer after batchnorm.
        if use_scale:
            sb_kwargs = {
                'bias_term':
                True,
                'param':
                [dict(lr_mult=1, decay_mult=0),
                 dict(lr_mult=1, decay_mult=0)],
                'filler':
                dict(type='constant', value=1.0),
                'bias_filler':
                dict(type='constant', value=0.0),
            }
        else:
            bias_kwargs = {
                'param': [dict(lr_mult=1, decay_mult=0)],
                'filler': dict(type='constant', value=0.0),
            }
    else:
        kwargs = {
            'param':
            [dict(lr_mult=1, decay_mult=1),
             dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='xavier'),
            'bias_filler': dict(type='constant', value=0)
        }

    conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix)
    [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2)
    [pad_h, pad_w] = UnpackVariable(pad, 2)
    [stride_h, stride_w] = UnpackVariable(stride, 2)
    if kernel_h == kernel_w:
        net[conv_name] = L.Convolution(net[from_layer],
                                       num_output=num_output,
                                       kernel_size=kernel_h,
                                       pad=pad_h,
                                       stride=stride_h,
                                       **kwargs)
    else:
        net[conv_name] = L.Convolution(net[from_layer],
                                       num_output=num_output,
                                       kernel_h=kernel_h,
                                       kernel_w=kernel_w,
                                       pad_h=pad_h,
                                       pad_w=pad_w,
                                       stride_h=stride_h,
                                       stride_w=stride_w,
                                       **kwargs)
    if dilation > 1:
        net.update(conv_name, {'dilation': dilation})
    if use_bn:
        bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix)
        net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs)
        if use_scale:
            sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix)
            net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs)
        else:
            bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix)
            net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs)
    if use_relu:
        relu_name = '{}_relu'.format(conv_name)
        net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def make_resnet(training_data='cifar10_train',
                test_data='cifar10_test',
                mean_file='mean.binaryproto',
                num_res_in_stage=3):
    num_feature_maps = np.array([16, 32, 64])  # feature map size: [32, 16, 8]

    n = caffe.NetSpec()
    # make training data layer
    n.data, n.label = L.Data(source=training_data,
                             backend=P.Data.LMDB,
                             batch_size=128,
                             ntop=2,
                             transform_param=dict(crop_size=32,
                                                  mean_file=mean_file,
                                                  mirror=True),
                             image_data_param=dict(shuffle=True),
                             include=dict(phase=0))
    # make test data layer
    n.test_data, n.test_label = L.Data(source=test_data,
                                       backend=P.Data.LMDB,
                                       batch_size=100,
                                       ntop=2,
                                       transform_param=dict(
                                           crop_size=32,
                                           mean_file=mean_file,
                                           mirror=False),
                                       include=dict(phase=1))
    # conv1 should accept both training and test data layers. But this is inconvenient to code in pycaffe.
    # You have to write two conv layers for them. To deal with this, I temporarily ignore the test data layer
    # and let conv1 accept the output of training data layer. Then, after making the whole prototxt, I postprocess
    # the top name of the two data layers, renaming their names to the same.
    n.conv_start = L.Convolution(
        n.data,
        kernel_size=3,
        stride=1,
        num_output=num_feature_maps[0],
        pad=1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=weight_filler,
        bias_filler=bias_filler)

    # set up a checkpoint so as to know where we get.
    checkpoint = 'n.conv_start'

    # start making blocks.
    # num_feature_maps: the number of feature maps for each stage. Default is [16,32,64],
    #                   suggesting the network has three stages.
    # num_res_in_stage: a parameter from the original paper, telling us how many blocks there are in
    #                   each stage.
    # stride_proj: control the stride of project path; the first project path uses stride 1, and the rest
    #              use stride 2.
    stride_proj = 1
    for num_map in num_feature_maps:
        num_map = int(num_map)
        for res in list(range(num_res_in_stage)):
            # stage name
            stage = 'map' + str(num_map) + '_' + str(res + 1) + '_'
            # use the projecting block when downsample the feature map
            if np.where(num_feature_maps == num_map)[0] >= 0 and res == 0:

                make_res = 'n.' + stage + 'bn_pre_train,' + \
                           'n.' + stage + 'bn_pre_test,' + \
                           'n.' + stage + 'pre_scale,' + \
                           'n.' + stage + 'pre_relu,' + \
                           'n.' + stage + 'conv1,' + \
                           'n.' + stage + 'bn1_train, ' + \
                           'n.' + stage + 'bn1_test, ' + \
                           'n.' + stage + 'scale1, ' + \
                           'n.' + stage + 'relu1, ' + \
                           'n.' + stage + 'conv2, ' + \
                           'n.' + stage + 'bn2_train, ' + \
                           'n.' + stage + 'bn2_test, ' + \
                           'n.' + stage + 'scale2, ' + \
                           'n.' + stage + 'relu2, ' + \
                           'n.' + stage + 'conv_end, ' + \
                           'n.' + stage + 'eltsum' + \
                           ' = project_block(' + checkpoint + ', base_channels=num_map, stride=' + str(stride_proj) +', pad=1)'
                exec(make_res)
                if stride_proj == 1:
                    stride_proj += 1
                checkpoint = 'n.' + stage + 'eltsum'  # where we get
                continue

            # most blocks have this shape
            make_res = 'n.' + stage + 'bn_pre_train, ' + \
                       'n.' + stage + 'bn_pre_test, ' + \
                       'n.' + stage + 'pre_scale, ' + \
                       'n.' + stage + 'pre_relu, ' + \
                       'n.' + stage + 'conv1, ' + \
                       'n.' + stage + 'bn1_train, ' + \
                       'n.' + stage + 'bn1_test, ' + \
                       'n.' + stage + 'scale1, ' + \
                       'n.' + stage + 'relu1, ' + \
                       'n.' + stage + 'conv2, ' + \
                       'n.' + stage + 'bn2_train, ' + \
                       'n.' + stage + 'bn2_test, ' + \
                       'n.' + stage + 'scale2, ' + \
                       'n.' + stage + 'relu2, ' + \
                       'n.' + stage + 'conv_end, ' + \
                       'n.' + stage + 'eltsum, ' + \
                       ' = identity_block(' + checkpoint + ', base_channels=num_map, stride=1, pad=1)'
            exec(make_res)
            checkpoint = 'n.' + stage + 'eltsum'  # where we get

    # add the rest layers
    exec(
        'n.BN_train_end = L.BatchNorm(' + checkpoint +
        ', param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), \
                            dict(lr_mult=0, decay_mult=0)], \
                            use_global_stats=False, in_place=False, include=dict(phase=0))'
    )

    exec(
        'n.BN_test_end = L.BatchNorm(' + checkpoint +
        ', param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), \
                                        dict(lr_mult=0, decay_mult=0)], \
                           use_global_stats=True, in_place=False, include=dict(phase=1))'
    )

    n.scale_end = L.Scale(n.BN_train_end,
                          scale_param=dict(bias_term=True),
                          in_place=True)

    n.relu_end = L.ReLU(n.scale_end, in_place=True)

    n.pool_global = L.Pooling(n.relu_end,
                              pool=P.Pooling.AVE,
                              global_pooling=True)
    n.score = L.InnerProduct(
        n.pool_global,
        num_output=10,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0))
    n.loss = L.SoftmaxWithLoss(n.score, n.label)
    n.acc = L.Accuracy(n.score, n.label)

    return n.to_proto()
Exemplo n.º 9
0
    def conv_batch_relu(net,
                        bottom,
                        name,
                        output,
                        kernel,
                        stride,
                        pad,
                        phase,
                        with_relu=True):
        def conv_params(name):
            conv_kwargs = {
                'param': [{
                    'name': name + '_w',
                    'lr_mult': 1,
                    'decay_mult': 1
                }, {
                    'name': name + '_b',
                    'lr_mult': 2,
                    'decay_mult': 0
                }],
                'weight_filler':
                dict(type='msra'),
                'bias_filler':
                dict(type='constant', value=0)
            }
            return conv_kwargs

        def bn_params(name, phase):
            bn_kwargs = {
                'use_global_stats':
                phase == caffe.TEST,
                'in_place':
                True,
                'param': [{
                    "name": name + '_w',
                    "lr_mult": 0
                }, {
                    "name": name + '_b',
                    "lr_mult": 0
                }, {
                    "name": name + '_t',
                    "lr_mult": 0
                }]
            }
            return bn_kwargs

        def scale_params(name):
            scale_kwargs = {
                'in_place': True,
                'param': [{
                    'name': name + '_w'
                }, {
                    'name': name + '_b'
                }],
                'bias_term': True
            }
            return scale_kwargs

        conv_kwargs = conv_params(name + '_conv')
        bn_kwargs = bn_params(name + '_bn', phase)
        scale_kwargs = scale_params(name + '_scale')

        conv = netset(
            net, name + '_conv',
            L.Convolution(bottom,
                          kernel_size=kernel,
                          stride=stride,
                          num_output=output,
                          pad=pad,
                          **conv_kwargs))
        batch = netset(net, name + '_bn', L.BatchNorm(conv, **bn_kwargs))
        scale = netset(net, name + '_scale', L.Scale(batch, **scale_kwargs))
        if with_relu:
            relu = netset(net, name + '_relu', L.ReLU(scale, in_place=True))
            return relu
        else:
            return scale
Exemplo n.º 10
0
def densenet(data_file=None,
             mode='train_test',
             batch_size=64,
             depth=40,
             first_output=16,
             growth_rate=12,
             dropout=0.2):
    nchannels = first_output
    if mode == 'deploy':
        # deploy.prototxt dont need data layer
        model = L.Convolution(bottom='data',
                              kernel_size=3,
                              stride=1,
                              num_output=nchannels,
                              pad=1,
                              bias_term=False,
                              weight_filler=dict(type='msra'),
                              bias_filler=dict(type='constant'))
    else:
        data, label = L.Data(source=data_file,
                             backend=P.Data.LMDB,
                             batch_size=batch_size,
                             ntop=2,
                             transform_param=dict(mirror=True,
                                                  crop_size=32,
                                                  mean_value=[129, 124, 112],
                                                  scale=1))
        model = L.Convolution(data,
                              kernel_size=3,
                              stride=1,
                              num_output=nchannels,
                              pad=1,
                              bias_term=False,
                              weight_filler=dict(type='msra'),
                              bias_filler=dict(type='constant'))

    N = (depth - 4) / 3
    for i in range(N):
        model = dense_block(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    for i in range(N):
        model = dense_block(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    for i in range(N):
        model = dense_block(model, growth_rate, dropout)
        nchannels += growth_rate

    model = L.BatchNorm(model, in_place=False)
    model = L.Scale(model,
                    bias_term=True,
                    in_place=True,
                    filler=dict(value=1),
                    bias_filler=dict(value=0))
    model = L.ReLU(model, in_place=True)
    model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
    model = L.InnerProduct(model,
                           num_output=100,
                           bias_term=True,
                           weight_filler=dict(type='xavier'),
                           bias_filler=dict(type='constant'))

    if mode == 'deploy':
        prob = L.Softmax(model)
        return to_proto(prob)
    else:
        loss = L.SoftmaxWithLoss(model, label)
        accuracy = L.Accuracy(model, label)
        return to_proto(loss, accuracy)
Exemplo n.º 11
0
def bn_scale_relu(bottom):
    bn = L.BatchNorm(bottom, use_global_stats=False)
    scale = L.Scale(bn, scale_param=dict(bias_term=True), in_place=True)
    relu = L.ReLU(bn, in_place=True)

    return bn, scale, relu
Exemplo n.º 12
0
def _conv_block(net,
                bottom,
                name,
                num_output,
                use_relu=True,
                kernel_size=3,
                stride=1,
                pad=1,
                bn_prefix='',
                bn_postfix='/bn',
                scale_prefix='',
                scale_postfix='/scale',
                direction=0):
    if direction is 0:
        conv = L.Convolution(bottom,
                             kernel_size=kernel_size,
                             stride=stride,
                             num_output=num_output,
                             pad=pad,
                             bias_term=False,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))
    elif direction is 1:
        conv = L.Convolution(bottom,
                             kernel_w=kernel_size,
                             kernel_h=1,
                             stride=stride,
                             num_output=num_output,
                             pad_w=pad,
                             pad_h=0,
                             bias_term=False,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))
    elif direction is 2:
        conv = L.Convolution(bottom,
                             kernel_w=1,
                             kernel_h=kernel_size,
                             stride=stride,
                             num_output=num_output,
                             pad_h=pad,
                             pad_w=0,
                             bias_term=False,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))
    net[name] = conv

    bn_name = '{}{}{}'.format(bn_prefix, name, bn_postfix)
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)
        ],
        'eps':
        0.001,
        'moving_average_fraction':
        0.999,
    }
    batch_norm = L.BatchNorm(conv, in_place=True, **bn_kwargs)
    net[bn_name] = batch_norm
    scale_kwargs = {
        'param': [
            dict(lr_mult=1, decay_mult=0),
            dict(lr_mult=2, decay_mult=0),
        ],
    }
    scale = L.Scale(batch_norm,
                    bias_term=True,
                    in_place=True,
                    filler=dict(value=1),
                    bias_filler=dict(value=0),
                    **scale_kwargs)
    sb_name = '{}{}{}'.format(scale_prefix, name, scale_postfix)
    net[sb_name] = scale

    if use_relu:
        out_layer = L.ReLU(scale, in_place=True)
        relu_name = '{}/relu'.format(name)
        net[relu_name] = out_layer
    else:
        out_layer = scale

    return out_layer
Exemplo n.º 13
0
    def compile_time_operation(self, learning_option, cluster):
        ksize = self.get_attr('ksize')
        stride = self.get_attr('stride')
        padding = self.get_attr('padding', self.padding)

        input_ = self.get_input('input')
        indim = self.get_dimension('input')
        # padding
        if padding == 'SAME':
            #print self.name + " : " + str(indim)
            outdim = [
                np.ceil(float(indim[i]) / float(stride)) for i in xrange(2)
            ]
            p = [
                int(((outdim[i] - 1) * stride + ksize - indim[i]) / 2)
                for i in xrange(2)
            ]
        else:
            outdim = [
                np.ceil(float(indim[i] - ksize + 1) / float(stride))
                for i in xrange(2)
            ]
            p = [0, 0]
        # pool=0: max_pool, pool=1: avr_pool
        layer = L.Pooling(input_,
                          name=self.name,
                          pool=1,
                          kernel_size=ksize,
                          stride=stride,
                          pad_h=p[0],
                          pad_w=p[1])

        ### activation
        activation = self.get_attr('activation', self.activation)

        if len(activation) != 0:
            for act in activation:
                # relu
                if act == 'relu':
                    layer = L.ReLU(layer,
                                   name=self.name + '_relu',
                                   in_place=True)

                # batch normalization
                elif act == 'batchnorm':
                    use_global_stats = self.get_attr('use_global_stats',
                                                     self.use_global_stats)
                    moving_average_fraction = self.get_attr(
                        'moving_average_fraction',
                        self.moving_average_fraction)
                    epsilon = self.get_attr('epsilon', self.epsilon)
                    layer = L.BatchNorm(
                        layer,
                        name=self.name + '_batchnorm',
                        use_global_stats=use_global_stats,
                        moving_average_fraction=moving_average_fraction,
                        eps=epsilon,
                        in_place=True)

                    # scale
                    if self.get_attr('is_scale', self.is_scale):
                        bias_term = self.get_attr('bias_term', self.bias_term)
                        layer = L.Scale(layer,
                                        bias_term=bias_term,
                                        in_place=True)

        # TODO: output이름 DLMDL과 맞출 지 고민
        self.set_output('output', layer)
        self.set_dimension('output', outdim)
Exemplo n.º 14
0
def convert(keras_model, keras_format, caffe_net_file, caffe_params_file):
    
    caffe_net = caffe.NetSpec()
    
    net_params = dict()
    
    outputs=dict()
    shape=()
    
    input_str = ''

    # tensorflow 2.0
    if len(caffe_net.tops) == 0 and False:
        input_name = 'data'
        input_shape = [1, keras_model.input.shape[1], keras_model.input.shape[2], keras_model.input.shape[3]]
        input_param = {'shape': {'dim': list(input_shape)}}
        caffe_net[input_name] = L.Input(input_param=input_param)

        input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input_name + '"', 1, input_shape[1], input_shape[2], input_shape[3])
        top = keras_model.input.name
        outputs[top] = input_name

    for layer in keras_model.layers:
        name = layer.name
        layer_type = type(layer).__name__
        
        config = layer.get_config()

        blobs = layer.get_weights()
        blobs_num = len(blobs)
        
        if type(layer.output)==list:
            raise Exception('Layers with multiply outputs are not supported')
        else: 
            top=layer.output.name
        
        if type(layer.input)!=list:
            bottom = layer.input.name
        
        #first we need to create Input layer
        '''
        if layer_type=='InputLayer' or len(caffe_net.tops)==0:

            input_name = 'data'
            caffe_net[input_name] = L.Layer()
            input_shape = config['batch_input_shape']
            input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format('"' + input_name + '"',
                1, input_shape[3], input_shape[1], input_shape[2])
            outputs[layer.input.name] = input_name
            if layer_type=='InputLayer':
                continue
        '''
        if layer_type == 'InputLayer' and len(caffe_net.tops)==0:
            name = 'data'
            input_shape = config['batch_input_shape']
            if "first" in keras_format:
                input_shape = [1, input_shape[1], input_shape[2], input_shape[3]]
            else:
                input_shape = [1, input_shape[3], input_shape[1], input_shape[2]]
            input_param = {'shape': {'dim': list(input_shape)}}
            caffe_net[name] = L.Input(input_param=input_param)

        elif layer_type=='Conv2D' or layer_type=='Convolution2D':
            
            strides = config['strides']
            kernel_size = config['kernel_size']
            
            kwargs = { 'num_output': config['filters'] }
            
            if kernel_size[0]==kernel_size[1]:
                kwargs['kernel_size']=kernel_size[0]
            else:
                kwargs['kernel_h']=kernel_size[0]
                kwargs['kernel_w']=kernel_size[1]
            
            if strides[0]==strides[1]:
                kwargs['stride']=strides[0]
            else:
                kwargs['stride_h']=strides[0]
                kwargs['stride_w']=strides[1]
            
            if not config['use_bias']:
                kwargs['bias_term'] = False
                #kwargs['param']=[dict(lr_mult=0)]
            else:
                #kwargs['param']=[dict(lr_mult=0), dict(lr_mult=0)]
                pass
            
            set_padding(config, layer.input_shape, kwargs)
            
            caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs)
            
            blobs[0] = np.array(blobs[0]).transpose(3,2,0,1)
            net_params[name] = blobs

            if config['activation'] == 'relu':
                name_s = name+'s'
                caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True)
            elif config['activation'] == 'sigmoid':
                name_s = name+'s'
                caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True)
            elif config['activation'] == 'linear':
                #do nothing
                pass
            else:
                raise Exception('Unsupported activation '+config['activation'])
        elif layer_type == 'Permute':
            # skip the layer
            name = outputs[bottom]

        elif layer_type=='DepthwiseConv2D':
            
            strides = config['strides']
            kernel_size = config['kernel_size']

            kwargs = {'num_output': layer.input_shape[3]}

            if kernel_size[0] == kernel_size[1]:
                kwargs['kernel_size'] = kernel_size[0]
            else:
                kwargs['kernel_h'] = kernel_size[0]
                kwargs['kernel_w'] = kernel_size[1]

            if strides[0] == strides[1]:
                kwargs['stride'] = strides[0]
            else:
                kwargs['stride_h'] = strides[0]
                kwargs['stride_w'] = strides[1]

            set_padding(config, layer.input_shape, kwargs)

            kwargs['group'] = layer.input_shape[3]

            kwargs['bias_term'] = False
            caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs)
            blob = np.array(blobs[0]).transpose(2, 3, 0, 1)
            blob.shape = (1,) + blob.shape
            net_params[name] = blob
            
            if config['activation'] == 'relu':
                name_s = name+'s'
                caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True)
            elif config['activation'] == 'sigmoid':
                name_s = name+'s'
                caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True)
            elif config['activation'] == 'linear':
                #do nothing
                pass
            else:
                raise Exception('Unsupported activation '+config['activation'])

        elif layer_type == 'SeparableConv2D':

            strides = config['strides']
            kernel_size = config['kernel_size']

            kwargs = {'num_output': layer.input_shape[3]}

            if kernel_size[0] == kernel_size[1]:
                kwargs['kernel_size'] = kernel_size[0]
            else:
                kwargs['kernel_h'] = kernel_size[0]
                kwargs['kernel_w'] = kernel_size[1]

            if strides[0] == strides[1]:
                kwargs['stride'] = strides[0]
            else:
                kwargs['stride_h'] = strides[0]
                kwargs['stride_w'] = strides[1]

            set_padding(config, layer.input_shape, kwargs)

            kwargs['group'] = layer.input_shape[3]

            kwargs['bias_term'] = False
            caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs)
            blob = np.array(blobs[0]).transpose(2, 3, 0, 1)
            blob.shape = (1,) + blob.shape
            net_params[name] = blob

            name2 = name + '_'
            kwargs = {'num_output': config['filters'], 'kernel_size': 1, 'bias_term': config['use_bias']}
            caffe_net[name2] = L.Convolution(caffe_net[name], **kwargs)

            if config['use_bias'] == True:
                blob2 = []
                blob2.append(np.array(blobs[1]).transpose(3, 2, 0, 1))
                blob2.append(np.array(blobs[2]))
                blob2[0].shape = (1,) + blob2[0].shape
            else:
                blob2 = np.array(blobs[1]).transpose(3, 2, 0, 1)
                blob2.shape = (1,) + blob2.shape

            net_params[name2] = blob2
            name = name2

        elif layer_type=='BatchNormalization':
            
            param = dict()
            
            variance = np.array(blobs[-1])
            mean = np.array(blobs[-2])
            
            if config['scale']:
                gamma = np.array(blobs[0])
                sparam=[dict(lr_mult=1), dict(lr_mult=1)]
            else:
                gamma = np.ones(mean.shape, dtype=np.float32)
                #sparam=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=1, decay_mult=1)]
                sparam=[dict(lr_mult=0), dict(lr_mult=1)]
                #sparam=[dict(lr_mult=0), dict(lr_mult=0)]
            
            if config['center']:
                beta = np.array(blobs[-3])
                param['bias_term']=True
            else:
                beta = np.zeros(mean.shape, dtype=np.float32)
                param['bias_term']=False
            
            caffe_net[name] = L.BatchNorm(caffe_net[outputs[bottom]], in_place=True)
            	#param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0)])
            	#param=[dict(lr_mult=1), dict(lr_mult=1), dict(lr_mult=0)])
                
            net_params[name] = (mean, variance, np.array(1.0)) 
            
            name_s = name+'s'
            
            caffe_net[name_s] = L.Scale(caffe_net[name], in_place=True, 
            	param=sparam, scale_param={'bias_term': config['center']})
            net_params[name_s] = (gamma, beta)
            
        elif layer_type=='Dense':
            caffe_net[name] = L.InnerProduct(caffe_net[outputs[bottom]], 
            	num_output=config['units'], weight_filler=dict(type='xavier'))
            
            if config['use_bias']:
                weight=np.array(blobs[0]).transpose(1, 0)
                if False and type(layer._inbound_nodes[0].inbound_layers[0]).__name__=='Flatten':
                    flatten_shape=layer._inbound_nodes[0].inbound_layers[0].input_shape
                    for i in range(weight.shape[0]):
                        weight[i]=np.array(weight[i].reshape(flatten_shape[1],flatten_shape[2],flatten_shape[3]).transpose(2,0,1).reshape(weight.shape[1]))
                net_params[name] = (weight, np.array(blobs[1]))
            else:
                weight=np.array(blobs[0]).transpose(1, 0)
                net_params[name] = (weight, np.zeros(weight.shape[0], dtype=weight.dtype))
                
            name_s = name+'s'
            if config['activation']=='softmax':
                caffe_net[name_s] = L.Softmax(caffe_net[name], in_place=True)
            elif config['activation']=='relu':
                caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True)
        
        elif layer_type=='Activation':
            if config['activation']=='relu':
                #caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True)
                if len(layer.input.consumers())>1:
                    caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]])
                else:
                    caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True)
            elif config['activation']=='relu6':
                #TODO
                caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]])
            elif config['activation']=='softmax':
                caffe_net[name] = L.Softmax(caffe_net[outputs[bottom]], in_place=True)
            elif config['activation'] == 'sigmoid':
                # name_s = name+'s'
                caffe_net[name] = L.Sigmoid(caffe_net[outputs[bottom]], in_place=True)
            #used to finish the image normalization.
            elif config['activation'] == 'linear':
                name = name + '_linear'
                caffe_net[name] = L.Scale(caffe_net[outputs[bottom]], filler=dict(type="constant", value=0.003921))
            else:
                raise Exception('Unsupported activation '+config['activation'])
        
        elif layer_type=='Cropping2D':
            shape = layer.output_shape
            ddata = L.DummyData(shape=dict(dim=[1, shape[3],shape[1], shape[2]]))
            layers = []
            layers.append(caffe_net[outputs[bottom]])   
            layers.append(ddata)   #TODO
            caffe_net[name] = L.Crop(*layers)
        
        elif layer_type=='Concatenate' or layer_type=='Merge':
            layers = []
            for i in layer.input:
                layers.append(caffe_net[outputs[i.name]])
            caffe_net[name] = L.Concat(*layers, axis=1)
        
        elif layer_type=='Add':
            '''PROD = 0; SUM = 1; MAX = 2;'''
            layers = []
            for i in layer.input:
                layers.append(caffe_net[outputs[i.name]])
            caffe_net[name] = L.Eltwise(*layers, eltwise_param ={'operation': 1 })
        
        elif layer_type=='Flatten':
            caffe_net[name] = L.Flatten(caffe_net[outputs[bottom]])
        
        elif layer_type=='Reshape':
            shape = config['target_shape']
            if len(shape)==3:
                #shape = (layer.input_shape[0], shape[2], shape[0], shape[1])
                shape = (1, shape[2], shape[0], shape[1])
            elif len(shape)==1:
                #shape = (layer.input_shape[0], 1, 1, shape[0])
                shape = (1, 1, 1, shape[0])
            caffe_net[name] = L.Reshape(caffe_net[outputs[bottom]], 
                reshape_param={'shape':{'dim': list(shape)}})
        
        elif layer_type=='MaxPooling2D' or layer_type=='AveragePooling2D':
            
            kwargs={}
            
            if layer_type=='MaxPooling2D':
                kwargs['pool'] = P.Pooling.MAX
            else:
                kwargs['pool'] = P.Pooling.AVE
                
            pool_size = config['pool_size']
            strides  = config['strides']
            
            if pool_size[0]!=pool_size[1]:
                raise Exception('Unsupported pool_size')
                    
            if strides[0]!=strides[1]:
                raise Exception('Unsupported strides')
            
            set_padding(config, layer.input_shape, kwargs)
            
            caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=pool_size[0], 
                stride=strides[0], **kwargs)
        
        elif layer_type=='Dropout':
            caffe_net[name] = L.Dropout(caffe_net[outputs[bottom]], 
                dropout_param=dict(dropout_ratio=config['rate']))
        
        elif layer_type=='GlobalAveragePooling2D':
            caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], pool=P.Pooling.AVE, 
                pooling_param=dict(global_pooling=True))
        
        elif layer_type=='UpSampling2D':
            if config['size'][0]!=config['size'][1]:
                raise Exception('Unsupported upsampling factor')
            factor = config['size'][0]
            kernel_size = 2 * factor - factor % 2
            stride = factor
            pad = int(math.ceil((factor - 1) / 2.0))
            channels = layer.input_shape[-1]
            caffe_net[name] = L.Deconvolution(caffe_net[outputs[bottom]], convolution_param=dict(num_output=channels, 
                group=channels, kernel_size=kernel_size, stride=stride, pad=pad, weight_filler=dict(type='bilinear'), 
                bias_term=False), param=dict(lr_mult=0, decay_mult=0))
        
        elif layer_type=='LeakyReLU':
            caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], negative_slope=config['alpha'], in_place=True)

        #TODO
        elif layer_type=='ZeroPadding2D':
            padding=config['padding']
            #ch = layer.input_shape[3]
            #caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], num_output=ch, kernel_size=1, stride=1, group=ch,
            #    pad_h=padding[0][0], pad_w=padding[1][0], convolution_param=dict(bias_term = False))
            #params = np.ones((1,ch,1,1))
            
            #net_params[name] = np.ones((1,ch,1,1,1))
            #net_params[name] = np.ones(layer.output_shape)
            
            caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=1, 
                stride=1, pad_h=padding[0][0]+padding[0][1], pad_w=padding[1][0]+padding[1][1], pool=P.Pooling.AVE)
        
        else:
            raise Exception('Unsupported layer type: '+layer_type)
            
        outputs[top]=name

    #replace empty layer with input blob
    #net_proto = input_str + '\n' + 'layer {' + 'layer {'.join(str(caffe_net.to_proto()).split('layer {')[2:])
    net_proto = str(caffe_net.to_proto())
    
    f = open(caffe_net_file, 'w') 
    f.write(net_proto)
    f.close()
    
    caffe_model = caffe.Net(caffe_net_file, caffe.TEST)
    
    for layer in caffe_model.params.keys():
        print(layer)
        if 'up_sampling2d' in layer:
            continue
        if "activation_linear" in layer:
            continue
        for n in range(0, len(caffe_model.params[layer])):
            caffe_model.params[layer][n].data[...] = net_params[layer][n]

    caffe_model.save(caffe_params_file)
Exemplo n.º 15
0
def reduction_b(bottom):
    """
    input:1152x17x17
    output:2048x8x8
    :param bottom: bottom layer
    :return: layers
    """
    pool = L.Pooling(bottom, kernel_size=3, stride=2, pool=P.Pooling.MAX)  # 1152x8x8

    conv_3x3_reduce = L.Convolution(bottom, kernel_size=1, num_output=256, stride=1,
                                    param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                    weight_filler=dict(type='xavier'),
                                    bias_filler=dict(type='constant', value=0))  # 256x17x17
    conv_3x3_reduce_bn = L.BatchNorm(conv_3x3_reduce, use_global_stats=False, in_place=True)
    conv_3x3_reduce_scale = L.Scale(conv_3x3_reduce, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_reduce_relu = L.ReLU(conv_3x3_reduce, in_place=True)

    conv_3x3 = L.Convolution(conv_3x3_reduce, kernel_size=3, num_output=384, stride=2,
                             param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant', value=0))  # 384x8x8
    conv_3x3_bn = L.BatchNorm(conv_3x3, use_global_stats=False, in_place=True)
    conv_3x3_scale = L.Scale(conv_3x3, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_relu = L.ReLU(conv_3x3, in_place=True)

    conv_3x3_2_reduce = L.Convolution(bottom, kernel_size=1, num_output=256, stride=1,
                                      param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant', value=0))  # 256x17x17
    conv_3x3_2_reduce_bn = L.BatchNorm(conv_3x3_2_reduce, use_global_stats=False, in_place=True)
    conv_3x3_2_reduce_scale = L.Scale(conv_3x3_2_reduce, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_2_reduce_relu = L.ReLU(conv_3x3_2_reduce, in_place=True)

    conv_3x3_2 = L.Convolution(conv_3x3_2_reduce, kernel_size=3, num_output=256, stride=2,
                               param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                               weight_filler=dict(type='xavier'),
                               bias_filler=dict(type='constant', value=0))  # 256x8x8
    conv_3x3_2_bn = L.BatchNorm(conv_3x3_2, use_global_stats=False, in_place=True)
    conv_3x3_2_scale = L.Scale(conv_3x3_2, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_2_relu = L.ReLU(conv_3x3_2, in_place=True)

    conv_3x3_3_reduce = L.Convolution(bottom, kernel_size=1, num_output=256, stride=1,
                                      param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant', value=0))  # 256x17x17
    conv_3x3_3_reduce_bn = L.BatchNorm(conv_3x3_3_reduce, use_global_stats=False, in_place=True)
    conv_3x3_3_reduce_scale = L.Scale(conv_3x3_3_reduce, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_3_reduce_relu = L.ReLU(conv_3x3_3_reduce, in_place=True)

    conv_3x3_3 = L.Convolution(conv_3x3_3_reduce, kernel_size=3, num_output=256, stride=1, pad=1,
                               param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                               weight_filler=dict(type='xavier'),
                               bias_filler=dict(type='constant', value=0))  # 256x17x17
    conv_3x3_3_bn = L.BatchNorm(conv_3x3_3, use_global_stats=False, in_place=True)
    conv_3x3_3_scale = L.Scale(conv_3x3_3, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_3_relu = L.ReLU(conv_3x3_3, in_place=True)

    conv_3x3_4 = L.Convolution(conv_3x3_3, kernel_size=3, num_output=256, stride=2,
                               param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                               weight_filler=dict(type='xavier'),
                               bias_filler=dict(type='constant', value=0))  # 256x8x8
    conv_3x3_4_bn = L.BatchNorm(conv_3x3_4, use_global_stats=False, in_place=True)
    conv_3x3_4_scale = L.Scale(conv_3x3_4, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_4_relu = L.ReLU(conv_3x3_4, in_place=True)

    concat = L.Concat(pool, conv_3x3, conv_3x3_2, conv_3x3_4)  # 2048x8x8

    return pool, conv_3x3_reduce, conv_3x3_reduce_bn, conv_3x3_reduce_scale, conv_3x3_reduce_relu, conv_3x3, \
           conv_3x3_bn, conv_3x3_scale, conv_3x3_relu, conv_3x3_2_reduce, conv_3x3_2_reduce_bn, \
           conv_3x3_2_reduce_scale, conv_3x3_2_reduce_relu, conv_3x3_2, conv_3x3_2_bn, conv_3x3_2_scale, \
           conv_3x3_2_relu, conv_3x3_3_reduce, conv_3x3_3_reduce_bn, conv_3x3_3_reduce_scale, conv_3x3_3_reduce_relu, \
           conv_3x3_3, conv_3x3_3_bn, conv_3x3_3_scale, conv_3x3_3_relu, conv_3x3_4, conv_3x3_4_bn, conv_3x3_4_scale, \
           conv_3x3_4_relu, concat
def AbdNet():
    growth_rate = 16
    dropout = 0.2
    vgg_nout = 64
    N = 5
    nchannels = 16
    imsize = 256
    msra = dict(type='msra')
    gs_1e_2 = dict(type='gaussian', std=0.01)
    # n = caffe.NetSpec()
    data, data2, albedo_diff_gt, albedo_gt = L.Python(ntop=4, \
        python_param=dict(\
            module='image_layer3_gradient',\
            layer='ImageLayer3',\
            param_str="{{'data_dir': '/home/albertxavier/dataset/sintel/images/', 'tops': ['data', 'data2', 'albedo_diff_gt', 'albedo_gt'],'seed': 1337,'split': 'train', 'list_file':'train_two_folds_split_scene.txt', 'mean_bgr': (104.00699, 116.66877, 122.67892), 'crop_size':({imsize},{imsize})}}".format(imsize=imsize)\
        )\
    )

    pool1, pool2, pool3, pool4, pool5 = make_VGG(data)

    # scale 2
    model = L.Convolution(data2, kernel_size=4, stride=2, 
                    num_output=96, pad=1, bias_term=True, weight_filler=msra, bias_filler=dict(type='constant', value=0))
    model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
    model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
    model = L.ReLU(model, in_place=True)
    model = L.Pooling(model, pooling_param=dict(pool=P.Pooling.MAX, kernel_size=2, stride=2))
    model = L.Dropout(model, dropout_ratio=dropout)



    # concat VGG
    vgg1 = upsampleVGG(pool1, upsample = 2/4, dropout=dropout, nout=vgg_nout)
    vgg2 = upsampleVGG(pool2, upsample = 4/4, dropout=dropout, nout=vgg_nout)
    vgg3 = upsampleVGG(pool3, upsample = 8/4, dropout=dropout, nout=vgg_nout)
    vgg4 = upsampleVGG(pool4, upsample = 16/4, dropout=dropout, nout=vgg_nout)
    vgg5 = upsampleVGG(pool5, upsample = 32/4, dropout=dropout, nout=vgg_nout)

    model = L.Concat(model, vgg1, vgg2, vgg3, vgg4, vgg5, axis=1)


    # block 1: dense
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout, weight_filler=msra)

    # block 2: dense
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout, weight_filler=msra)

    # block 3: res
    # nchannels = int(nchannels * 0.6)
    # for i in range(N):
    #     if i == 0: project = True
    # else: project = False
    #     model = add_layer(bottom, nchannels, dropout, project=project)

    block 3: dense
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout, weight_filler=msra)

    # deep supervision
    model_deep = L.Convolution(model, kernel_size=1, stride=1, num_output=96, pad=0, bias_term=False, weight_filler=gs_1e_2, param=[dict(lr_mult=1, decay_mult=1)])
    model_deep = L.Deconvolution(model_deep,  convolution_param=dict(kernel_size=8, stride=4, num_output=3, pad=2, bias_term=True, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0)), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    loss_deep = L.Python(\
        model_deep, albedo_gt,\
        loss_weight=1.0, ntop=1,\
        python_param=dict(\
            module='l2loss',\
            layer='L2LossLayer',\
        )\
    )
    # model = L.Concat(model, model_deep, propagate_down=[True, False])

    # block 4
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout=0., weight_filler=msra)

    # fuse feature
    model = L.Convolution(model, kernel_size=1, stride=1, num_output=96, pad=0, bias_term=False, weight_filler=gs_1e_2, bias_filler=dict(type='constant'))
    # upsample
    model = L.Deconvolution(model,  convolution_param=dict(kernel_size=8, stride=4, num_output=6, pad=2, bias_term=True, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0)), param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)])

    # loss
    loss = L.Python(\
        model, albedo_diff_gt,\
        loss_weight=1.0, ntop=1,\
        python_param=dict(\
            module='l2loss-gradient-hist',\
            layer='L2LossLayer',\
            param_str="{'display': True}"\
        )\
    )

    return to_proto(loss, loss_deep)
Exemplo n.º 17
0
def stem_299x299(bottom):
    """
    input:3x299x299
    output:384x35x35
    :param bottom: bottom layer
    :return: layers
    """
    conv1_3x3_s2 = L.Convolution(bottom, kernel_size=3, num_output=32, stride=2,
                                 param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                 weight_filler=dict(type='xavier', std=0.01),
                                 bias_filler=dict(type='constant', value=0.2))  # 32x149x149
    conv1_3x3_s2_bn = L.BatchNorm(conv1_3x3_s2, use_global_stats=False, in_place=True)
    conv1_3x3_s2_scale = L.Scale(conv1_3x3_s2, scale_param=dict(bias_term=True), in_place=True)
    conv1_3x3_s2_relu = L.ReLU(conv1_3x3_s2, in_place=True)

    conv2_3x3_s1 = L.Convolution(conv1_3x3_s2, kernel_size=3, num_output=32, stride=1,
                                 param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                 weight_filler=dict(type='xavier', std=0.01),
                                 bias_filler=dict(type='constant', value=0.2))  # 32x147x147
    conv2_3x3_s1_bn = L.BatchNorm(conv2_3x3_s1, use_global_stats=False, in_place=True)
    conv2_3x3_s1_scale = L.Scale(conv2_3x3_s1, scale_param=dict(bias_term=True), in_place=True)
    conv2_3x3_s1_relu = L.ReLU(conv2_3x3_s1, in_place=True)

    conv3_3x3_s1 = L.Convolution(conv2_3x3_s1, kernel_size=3, num_output=64, stride=1, pad=1,
                                 param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                 weight_filler=dict(type='xavier', std=0.01),
                                 bias_filler=dict(type='constant', value=0.2))  # 64x147x147
    conv3_3x3_s1_bn = L.BatchNorm(conv3_3x3_s1, use_global_stats=False, in_place=True)
    conv3_3x3_s1_scale = L.Scale(conv3_3x3_s1, scale_param=dict(bias_term=True), in_place=True)
    conv3_3x3_s1_relu = L.ReLU(conv3_3x3_s1, in_place=True)

    inception_stem1_3x3_s2 = L.Convolution(conv3_3x3_s1, kernel_size=3, num_output=96, stride=2,
                                           param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                           weight_filler=dict(type='xavier', std=0.01),
                                           bias_filler=dict(type='constant', value=0.2))  # 96x73x73
    inception_stem1_3x3_s2_bn = L.BatchNorm(inception_stem1_3x3_s2, use_global_stats=False, in_place=True)
    inception_stem1_3x3_s2_scale = L.Scale(inception_stem1_3x3_s2, scale_param=dict(bias_term=True),
                                           in_place=True)
    inception_stem1_3x3_s2_relu = L.ReLU(inception_stem1_3x3_s2, in_place=True)
    inception_stem1_pool = L.Pooling(conv3_3x3_s1, kernel_size=3, stride=2,
                                     pool=P.Pooling.MAX)  # 64x73x73
    inception_stem1 = L.Concat(inception_stem1_3x3_s2, inception_stem1_pool)  # 160x73x73

    inception_stem2_3x3_reduce = L.Convolution(inception_stem1, kernel_size=1, num_output=64,
                                               param=[dict(lr_mult=1, decay_mult=1),
                                                      dict(lr_mult=2, decay_mult=0)],
                                               weight_filler=dict(type='xavier', std=0.01),
                                               bias_filler=dict(type='constant', value=0.2))  # 64x73x73
    inception_stem2_3x3_reduce_bn = L.BatchNorm(inception_stem2_3x3_reduce, use_global_stats=False,
                                                in_place=True)
    inception_stem2_3x3_reduce_scale = L.Scale(inception_stem2_3x3_reduce,
                                               scale_param=dict(bias_term=True), in_place=True)
    inception_stem2_3x3_reduce_relu = L.ReLU(inception_stem2_3x3_reduce, in_place=True)
    inception_stem2_3x3 = L.Convolution(inception_stem2_3x3_reduce, kernel_size=3, num_output=96,
                                        param=[dict(lr_mult=1, decay_mult=1),
                                               dict(lr_mult=2, decay_mult=0)],
                                        weight_filler=dict(type='xavier', std=0.01),
                                        bias_filler=dict(type='constant', value=0.2))  # 96x71x71
    inception_stem2_3x3_bn = L.BatchNorm(inception_stem2_3x3, use_global_stats=False, in_place=True)
    inception_stem2_3x3_scale = L.Scale(inception_stem2_3x3, scale_param=dict(bias_term=True), in_place=True)
    inception_stem2_3x3_relu = L.ReLU(inception_stem2_3x3, in_place=True)

    inception_stem2_7x1_reduce = L.Convolution(inception_stem1, kernel_size=1, num_output=64,
                                               param=[dict(lr_mult=1, decay_mult=1),
                                                      dict(lr_mult=2, decay_mult=0)],
                                               weight_filler=dict(type='xavier', std=0.01),
                                               bias_filler=dict(type='constant', value=0.2))  # 64x73x73
    inception_stem2_7x1_reduce_bn = L.BatchNorm(inception_stem2_7x1_reduce, use_global_stats=False,
                                                in_place=True)
    inception_stem2_7x1_reduce_scale = L.Scale(inception_stem2_7x1_reduce,
                                               scale_param=dict(bias_term=True), in_place=True)
    inception_stem2_7x1_reduce_relu = L.ReLU(inception_stem2_7x1_reduce, in_place=True)
    inception_stem2_7x1 = L.Convolution(inception_stem2_7x1_reduce, kernel_h=7, kernel_w=1, num_output=64,
                                        pad_h=3, pad_w=0, stride=1,
                                        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                        weight_filler=dict(type='xavier'),
                                        bias_filler=dict(type='constant', value=0))  # 64x73x73
    inception_stem2_7x1_bn = L.BatchNorm(inception_stem2_7x1, use_global_stats=False, in_place=True)
    inception_stem2_7x1_scale = L.Scale(inception_stem2_7x1, scale_param=dict(bias_term=True), in_place=True)
    inception_stem2_7x1_relu = L.ReLU(inception_stem2_7x1, in_place=True)
    inception_stem2_1x7 = L.Convolution(inception_stem2_7x1, kernel_h=1, kernel_w=7, num_output=64,
                                        pad_h=0, pad_w=3, stride=1,
                                        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                        weight_filler=dict(type='xavier'),
                                        bias_filler=dict(type='constant', value=0))  # 64x73x73
    inception_stem2_1x7_bn = L.BatchNorm(inception_stem2_1x7, use_global_stats=False, in_place=True)
    inception_stem2_1x7_scale = L.Scale(inception_stem2_1x7, scale_param=dict(bias_term=True), in_place=True)
    inception_stem2_1x7_relu = L.ReLU(inception_stem2_1x7, in_place=True)
    inception_stem2_3x3_2 = L.Convolution(inception_stem2_1x7, kernel_size=3, num_output=96,
                                          param=[dict(lr_mult=1, decay_mult=1),
                                                 dict(lr_mult=2, decay_mult=0)],
                                          weight_filler=dict(type='xavier', std=0.01),
                                          bias_filler=dict(type='constant', value=0.2))  # 96x71x71
    inception_stem2_3x3_2_bn = L.BatchNorm(inception_stem2_3x3_2, use_global_stats=False, in_place=True)
    inception_stem2_3x3_2_scale = L.Scale(inception_stem2_3x3_2, scale_param=dict(bias_term=True),
                                          in_place=True)
    inception_stem2_3x3_2_relu = L.ReLU(inception_stem2_3x3_2, in_place=True)
    inception_stem2 = L.Concat(inception_stem2_3x3, inception_stem2_3x3_2)  # 192x71x71

    inception_stem3_3x3_s2 = L.Convolution(inception_stem2, kernel_size=3, num_output=192, stride=2,
                                           param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                           weight_filler=dict(type='xavier', std=0.01),
                                           bias_filler=dict(type='constant', value=0.2))  # 192x35x35
    inception_stem3_3x3_s2_bn = L.BatchNorm(inception_stem3_3x3_s2, use_global_stats=False, in_place=True)
    inception_stem3_3x3_s2_scale = L.Scale(inception_stem3_3x3_s2, scale_param=dict(bias_term=True),
                                           in_place=True)
    inception_stem3_3x3_s2_relu = L.ReLU(inception_stem3_3x3_s2, in_place=True)
    inception_stem3_pool = L.Pooling(inception_stem2, kernel_size=3, stride=2,
                                     pool=P.Pooling.MAX)  # 192x35x35
    inception_stem3 = L.Concat(inception_stem3_3x3_s2, inception_stem3_pool)  # 384x35x35

    return conv1_3x3_s2, conv1_3x3_s2_bn, conv1_3x3_s2_scale, conv1_3x3_s2_relu, conv2_3x3_s1, conv2_3x3_s1_bn, \
           conv2_3x3_s1_scale, conv2_3x3_s1_relu, conv3_3x3_s1, conv3_3x3_s1_bn, conv3_3x3_s1_scale, conv3_3x3_s1_relu, \
           inception_stem1_3x3_s2, inception_stem1_3x3_s2_bn, inception_stem1_3x3_s2_scale, inception_stem1_3x3_s2_relu, \
           inception_stem1_pool, inception_stem1, inception_stem2_3x3_reduce, inception_stem2_3x3_reduce_bn, \
           inception_stem2_3x3_reduce_scale, inception_stem2_3x3_reduce_relu, inception_stem2_3x3, \
           inception_stem2_3x3_bn, inception_stem2_3x3_scale, inception_stem2_3x3_relu, inception_stem2_7x1_reduce, \
           inception_stem2_7x1_reduce_bn, inception_stem2_7x1_reduce_scale, inception_stem2_7x1_reduce_relu, \
           inception_stem2_7x1, inception_stem2_7x1_bn, inception_stem2_7x1_scale, inception_stem2_7x1_relu, \
           inception_stem2_1x7, inception_stem2_1x7_bn, inception_stem2_1x7_scale, inception_stem2_1x7_relu, \
           inception_stem2_3x3_2, inception_stem2_3x3_2_bn, inception_stem2_3x3_2_scale, inception_stem2_3x3_2_relu, \
           inception_stem2, inception_stem3_3x3_s2, inception_stem3_3x3_s2_bn, inception_stem3_3x3_s2_scale, \
           inception_stem3_3x3_s2_relu, inception_stem3_pool, inception_stem3
Exemplo n.º 18
0
def densenet(data_file,
             mode='train',
             batch_size=64,
             depth=40,
             first_output=16,
             growth_rate=12,
             dropout=0.2):
    data, label = L.Data(
        source=data_file,
        backend=P.Data.LMDB,
        batch_size=batch_size,
        ntop=2,
        transform_param=dict(
            mean_file=
            "/home/sjxy/densenetcaffe/examples/cifar10/mean.binaryproto"))

    nchannels = first_output
    model = L.Convolution(data,
                          kernel_size=3,
                          stride=1,
                          num_output=nchannels,
                          pad=1,
                          bias_term=False,
                          weight_filler=dict(type='msra'),
                          bias_filler=dict(type='constant'))

    N = (depth - 4) / 3
    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate
    model = transition(model, nchannels, dropout)

    for i in range(N):
        model = add_layer(model, growth_rate, dropout)
        nchannels += growth_rate

    model = L.BatchNorm(model,
                        in_place=False,
                        param=[
                            dict(lr_mult=0, decay_mult=0),
                            dict(lr_mult=0, decay_mult=0),
                            dict(lr_mult=0, decay_mult=0)
                        ])
    model = L.Scale(model,
                    bias_term=True,
                    in_place=True,
                    filler=dict(value=1),
                    bias_filler=dict(value=0))
    model = L.ReLU(model, in_place=True)
    model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
    model = L.InnerProduct(model,
                           num_output=10,
                           bias_term=True,
                           weight_filler=dict(type='xavier'),
                           bias_filler=dict(type='constant'))
    loss = L.SoftmaxWithLoss(model, label)
    accuracy = L.Accuracy(model, label)
    return to_proto(loss, accuracy)
Exemplo n.º 19
0
def ConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output,
    kernel_size, pad, stride, dilation=1, use_scale=True, lr_mult=1,
    conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn',
    scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias',
    **bn_params):
  if use_bn:
    # parameters for convolution layer with batchnorm.
    kwargs = {
        'param': [dict(lr_mult=lr_mult, decay_mult=1)],
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_term': False,
        }
    eps = bn_params.get('eps', 0.001)
    moving_average_fraction = bn_params.get('moving_average_fraction', 0.999)
    use_global_stats = bn_params.get('use_global_stats', False)
    # parameters for batchnorm layer.
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)],
        'eps': eps,
        'moving_average_fraction': moving_average_fraction,
        }
    bn_lr_mult = lr_mult
    if use_global_stats:
      # only specify if use_global_stats is explicitly provided;
      # otherwise, use_global_stats_ = this->phase_ == TEST;
      bn_kwargs = {
          'param': [
              dict(lr_mult=0, decay_mult=0),
              dict(lr_mult=0, decay_mult=0),
              dict(lr_mult=0, decay_mult=0)],
          'eps': eps,
          'use_global_stats': use_global_stats,
          }
      # not updating scale/bias parameters
      bn_lr_mult = 0
    # parameters for scale bias layer after batchnorm.
    if use_scale:
      sb_kwargs = {
          'bias_term': True,
          'param': [
              dict(lr_mult=bn_lr_mult, decay_mult=0),
              dict(lr_mult=bn_lr_mult, decay_mult=0)],
          'filler': dict(type='constant', value=1.0),
          'bias_filler': dict(type='constant', value=0.0),
          }
    else:
      bias_kwargs = {
          'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)],
          'filler': dict(type='constant', value=0.0),
          }
  else:
    kwargs = {
        'param': [
            dict(lr_mult=lr_mult, decay_mult=1),
            dict(lr_mult=2 * lr_mult, decay_mult=0)],
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)
        }

  conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix)
  [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2)
  [pad_h, pad_w] = UnpackVariable(pad, 2)
  [stride_h, stride_w] = UnpackVariable(stride, 2)
  if kernel_h == kernel_w:
    net[conv_name] = L.Convolution(net[from_layer], num_output=num_output,
        kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs)
  else:
    net[conv_name] = L.Convolution(net[from_layer], num_output=num_output,
        kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w,
        stride_h=stride_h, stride_w=stride_w, **kwargs)
  if dilation > 1:
    net.update(conv_name, {'dilation': dilation})
  if use_bn:
    bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix)
    net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs)
    if use_scale:
      sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix)
      net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs)
    else:
      bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix)
      net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs)
  if use_relu:
    relu_name = '{}_relu'.format(conv_name)
    net[relu_name] = L.ReLU(net[conv_name], in_place=True)
Exemplo n.º 20
0
def qlstm(mode, batchsize, T, question_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize})
    # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
    #     module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 )
    n.data, n.cont, n.img_feature, n.label = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 )
    
    # word embedding
    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    # n.embed = L.TanH(n.embed_ba)
    n.embed_scale = L.Scale(n.embed_ba, n.cont, scale_param=dict(dict(axis=0)))
    n.embed_scale_resh = L.Reshape(n.embed_scale,\
                          reshape_param=dict(\
                              shape=dict(dim=[batchsize,1,T,-1])))

    # convolution
    n.word_feature1_1 = L.Convolution(n.embed_scale_resh, kernel_h=1, kernel_w=300, stride=1, num_output=512, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) # N x C x T x 1
    n.word_feature1_3 = L.Convolution(n.embed_scale_resh, kernel_h=3, kernel_w=300, stride=1, num_output=512, pad_h=1, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_feature1_5 = L.Convolution(n.embed_scale_resh, kernel_h=5, kernel_w=300, stride=1, num_output=512, pad_h=2, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_feature1_7 = L.Convolution(n.embed_scale_resh, kernel_h=7, kernel_w=300, stride=1, num_output=512, pad_h=3, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_relu1_1 = L.ReLU(n.word_feature1_1)
    n.word_relu1_3 = L.ReLU(n.word_feature1_3)
    n.word_relu1_5 = L.ReLU(n.word_feature1_5)
    n.word_relu1_7 = L.ReLU(n.word_feature1_7)
    word_vec1 = [n.word_relu1_1, n.word_relu1_3, n.word_relu1_5, n.word_relu1_7]
    n.concat_vec1 = L.Concat(*word_vec1, concat_param={'axis': 1}) # N x C' x T x 1

    n.word_feature2_1 = L.Convolution(n.concat_vec1, kernel_h=1, kernel_w=1, stride=1, num_output=512, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) # N x C x T x 1
    n.word_feature2_3 = L.Convolution(n.concat_vec1, kernel_h=3, kernel_w=1, stride=1, num_output=512, pad_h=1, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_feature2_5 = L.Convolution(n.concat_vec1, kernel_h=5, kernel_w=1, stride=1, num_output=512, pad_h=2, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_feature2_7 = L.Convolution(n.concat_vec1, kernel_h=7, kernel_w=1, stride=1, num_output=512, pad_h=3, pad_w=0, weight_filler=dict(type='xavier'))
    word_vec2 = [n.word_feature2_1, n.word_feature2_3, n.word_feature2_5, n.word_feature2_7]
    n.concat_vec2 = L.Concat(*word_vec2, concat_param={'axis': 1}) # N x 4C x T x 1
    n.res_1 = L.Eltwise(n.concat_vec1,n.concat_vec2)
    n.res_1_relu = L.ReLU(n.res_1)

    n.word_vec_p = L.Pooling(n.res_1_relu, kernel_h=T, kernel_w=1, stride=T, pool=P.Pooling.MAX) # N x C x 1 x 1
    n.sentence_vec = L.Dropout(n.word_vec_p, dropout_param={'dropout_ratio':0.5})
    
    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.sentence_vec, axis=2, tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[-1,2048,14,14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000,sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,dropout_param={'dropout_ratio':0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,2,14*14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,2,14,14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis':1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.att_feature0  = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy)
    n.att_feature1  = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy)
    n.att_feature0_resh = L.Reshape(n.att_feature0, reshape_param=dict(shape=dict(dim=[-1,2048])))
    n.att_feature1_resh = L.Reshape(n.att_feature1, reshape_param=dict(shape=dict(dim=[-1,2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,4096,1,1])))
    #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.sentence_vec, 
                                      compact_bilinear_param=dict(num_output=16000,sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio':0.1})
    n.bc_dropped_resh = L.Reshape(n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
Exemplo n.º 21
0
def conv_bn_relu(bottom,
                 kernel_size,
                 num_output,
                 name,
                 deploy,
                 stride=1,
                 pad=0,
                 group=1,
                 conv_bias_term=True):
    if conv_bias_term:
        conv = L.Convolution(bottom,
                             kernel_size=kernel_size,
                             stride=stride,
                             num_output=num_output,
                             pad=pad,
                             group=group,
                             weight_filler=dict(type="xavier"),
                             bias_filler=dict(type="constant", value=0),
                             param=[
                                 dict(lr_mult=1, decay_mult=1),
                                 dict(lr_mult=2, decay_mult=0)
                             ],
                             name="{0}_conv".format(name))
    else:
        conv = L.Convolution(bottom,
                             kernel_size=kernel_size,
                             stride=stride,
                             num_output=num_output,
                             pad=pad,
                             group=group,
                             weight_filler=dict(type="xavier"),
                             bias_term=False,
                             param=[dict(lr_mult=1, decay_mult=1)],
                             name="{0}_conv".format(name))

    if deploy:
        # In our BN layers, the provided mean and variance are strictly computed using
        # average (not moving average) on a sufficiently large training batch after the training procedure.
        # The numerical results are very stable (variation of val error < 0.1%).
        # Using moving average might lead to different results.
        # from https://github.com/KaimingHe/deep-residual-networks
        # So set use_global_stats = true in deployment. See also ReNet deployment.
        batch_norm = L.BatchNorm(conv,
                                 in_place=True,
                                 batch_norm_param=dict(use_global_stats=True),
                                 name="{0}_batch_norm".format(name))
    else:
        # By default, use_global_stats is set to false when the network is in the training
        # // phase and true when the network is in the testing phase.
        # from caffe BatchNorm
        batch_norm = L.BatchNorm(conv,
                                 in_place=True,
                                 param=[
                                     dict(lr_mult=0, decay_mult=0),
                                     dict(lr_mult=0, decay_mult=0),
                                     dict(lr_mult=0, decay_mult=0)
                                 ],
                                 name="{0}_batch_norm".format(name))

    scale = L.Scale(batch_norm,
                    bias_term=True,
                    in_place=True,
                    name="{0}_scale".format(name))
    relu = L.ReLU(scale, in_place=True, name="{0}_relu".format(name))
    return relu
Exemplo n.º 22
0
def lenet(lmdb, batch_size):

    n = caffe.NetSpec()
    # Input layer
    n.data, n.label = L.Data(batch_size=batch_size,
                             backend=P.Data.LMDB,
                             source=lmdb,
                             transform_param=dict(scale=1. / 255),
                             ntop=2)

    # Residual convolution
    n.convres = L.Convolution(n.data,
                              kernel_size=5,
                              num_output=12,
                              stride=1,
                              weight_filler=dict(type='xavier'))
    # No activation for this first layer

    # Two layers of convolution
    n.conv1 = L.Convolution(n.convres,
                            kernel_size=7,
                            num_output=64,
                            stride=2,
                            weight_filler=dict(type='xavier'))
    n.batch_norm1 = L.BatchNorm(n.conv1,
                                in_place=True,
                                param=[
                                    dict(lr_mult=0, decay_mult=0),
                                    dict(lr_mult=0, decay_mult=0),
                                    dict(lr_mult=0, decay_mult=0)
                                ])
    n.scale1 = L.Scale(n.batch_norm1, bias_term=True, in_place=True)
    n.relu2 = L.TanH(n.scale1, in_place=True)
    #n.relu2 = L.ReLU(n.scale1, in_place=True)
    n.pool1 = L.Pooling(n.relu2, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    n.conv2 = L.Convolution(n.pool1,
                            kernel_size=5,
                            num_output=48,
                            stride=1,
                            weight_filler=dict(type='xavier'))
    n.batch_norm2 = L.BatchNorm(n.conv2,
                                in_place=True,
                                param=[
                                    dict(lr_mult=0, decay_mult=0),
                                    dict(lr_mult=0, decay_mult=0),
                                    dict(lr_mult=0, decay_mult=0)
                                ])
    n.scale2 = L.Scale(n.batch_norm2, bias_term=True, in_place=True)
    n.relu3 = L.TanH(n.scale2, in_place=True)
    #n.relu3 = L.ReLU(n.scale2, in_place=True)
    n.pool2 = L.Pooling(n.relu3, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    # Dense classifier
    n.fc1 = L.InnerProduct(n.pool2,
                           num_output=4096,
                           weight_filler=dict(type='xavier'))
    n.relu4 = L.ReLU(n.fc1, in_place=True)
    n.drop1 = L.Dropout(n.relu4, in_place=True)

    n.fc2 = L.InnerProduct(n.drop1,
                           num_output=4096,
                           weight_filler=dict(type='xavier'))
    n.relu5 = L.ReLU(n.fc2, in_place=True)
    n.drop2 = L.Dropout(n.relu5, in_place=True)

    # Outputs
    n.score = L.InnerProduct(n.drop2,
                             num_output=2,
                             weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.score, n.label)

    return n.to_proto()
Exemplo n.º 23
0
def conv_BN_scale_relu(bottom, nout, ks=3, stride=1, pad=1):
    conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
        num_output=nout, pad=pad, bias_term=False,
        weight_filler=dict(type="msra") )
    return conv, L.BatchNorm(conv, in_place=True), L.Scale(conv, in_place=True, bias_term=True), L.ReLU(conv, in_place=True)
Exemplo n.º 24
0
def project_residual(bottom,
                     kernel_size=3,
                     num_out=64,
                     stride=1,
                     pad=0,
                     first=None):

    # branch 1
    if (first == 'both_act'):
        pre_bn = L.BatchNorm(bottom, in_place=True)
        pre_scale = L.Scale(pre_bn,
                            scale_param=dict(bias_term=True),
                            in_place=True)
        pre_relu = L.ReLU(pre_scale, in_place=True)
        conv_proj = L.Convolution(pre_relu,
                                  kernel_size=1,
                                  num_output=num_out * 4,
                                  stride=stride,
                                  pad=0,
                                  param=[dict(lr_mult=1, decay_mult=1)],
                                  bias_term=False,
                                  weight_filler=weight_filler)
    elif (first != 'pre_act'):
        pre_bn = L.BatchNorm(bottom, in_place=True)
        pre_scale = L.Scale(pre_bn,
                            scale_param=dict(bias_term=True),
                            in_place=True)
        pre_relu = L.ReLU(pre_scale, in_place=True)
        conv_proj = L.Convolution(bottom,
                                  kernel_size=1,
                                  num_output=num_out * 4,
                                  stride=stride,
                                  pad=0,
                                  param=[dict(lr_mult=1, decay_mult=1)],
                                  bias_term=False,
                                  weight_filler=weight_filler)
    else:
        pre_relu = bottom
        pre_bn = bottom
        pre_scale = bottom
        pre_relu = bottom
        conv_proj = L.Convolution(bottom,
                                  kernel_size=1,
                                  num_output=num_out * 4,
                                  stride=stride,
                                  pad=0,
                                  param=[dict(lr_mult=1, decay_mult=1)],
                                  bias_term=False,
                                  weight_filler=weight_filler)

    # branch 2
    conv1, bn1, scale1, relu1 = conv_bn_scale_relu(pre_relu,
                                                   kernel_size=1,
                                                   num_out=num_out,
                                                   stride=1,
                                                   pad=0)
    conv2, bn2, scale2, relu2 = conv_bn_scale_relu(conv1,
                                                   kernel_size=3,
                                                   num_out=num_out,
                                                   stride=stride,
                                                   pad=1)
    conv3 = L.Convolution(relu2,
                          kernel_size=1,
                          num_output=num_out * 4,
                          stride=1,
                          pad=0,
                          param=[dict(lr_mult=1, decay_mult=1)],
                          bias_term=False,
                          weight_filler=weight_filler)

    eltsum = eltsum_block(conv_proj, conv3)

    return pre_bn, pre_scale, pre_relu, \
           conv_proj, \
           conv1, bn1, scale1, relu1, \
           conv2, bn2, scale2, relu2, \
           conv3, eltsum
Exemplo n.º 25
0
def qlstm(mode, batchsize, T, question_vocab_size, embed_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode': mode, 'batchsize': batchsize})
    n.data, n.cont, n.img_feature, n.label = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 )

    # word embedding (static + dynamic)
    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=embed_size, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    n.embed_scale = L.Scale(n.embed_ba, n.cont,
                            scale_param=dict(dict(axis=0)))  # N x T x d_w
    n.embed_scale_resh = L.Reshape(
        n.embed_scale,
        reshape_param=dict(shape=dict(dim=[batchsize, T, embed_size, 1])))

    # avg of word embedding
    n.embed_avg = L.Convolution(n.embed_scale_resh,
                                convolution_param={
                                    'kernel_size': 1,
                                    'num_output': 1,
                                    'bias_term': False,
                                    'weight_filler': dict(type='constant',
                                                          value=1)
                                },
                                param=dict(lr_mult=0,
                                           decay_mult=0))  # N x 1 x d_w x 1
    n.embed_avg_resh = L.Reshape(
        n.embed_avg,
        reshape_param=dict(shape=dict(dim=[batchsize, embed_size, 1, 1])))

    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.embed_avg_resh,
                                              axis=2,
                                              tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1,
                                            axis=3,
                                            tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(
        n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled,
                               n.i_emb_tanh_droped_resh,
                               compact_bilinear_param=dict(num_output=16000,
                                                           sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,
                              dropout_param={'dropout_ratio': 0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped,
                                kernel_size=1,
                                stride=1,
                                num_output=512,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu,
                                kernel_size=1,
                                stride=1,
                                num_output=2,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(
        n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,
                      reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0,
                                     dummy)
    n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1,
                                     dummy)
    n.att_feature0_resh = L.Reshape(
        n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature1_resh = L.Reshape(
        n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(
        n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1])))
    #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh,
                                      n.embed_avg_resh,
                                      compact_bilinear_param=dict(
                                          num_output=16000, sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2,
                             dropout_param={'dropout_ratio': 0.1})
    n.bc_dropped_resh = L.Reshape(
        n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh,
                                  num_output=3000,
                                  weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
Exemplo n.º 26
0
def make_resnet(training_data='train_data_path',
                test_data='test_data_path',
                mean_file='mean.binaryproto',
                depth=50):

    # num_feature_maps = np.array([16, 32, 64]) # feature map size: [32, 16, 8]
    configs = {
        50: [3, 4, 6, 3],
        101: [3, 4, 23, 3],
        152: [3, 8, 36, 3],
        200: [3, 24, 36, 3],
    }

    block_config = configs[depth]
    num_feature_maps = [64, 128, 256, 512]
    n_stage = len(num_feature_maps)

    n = caffe.NetSpec()
    # make training data layer
    n.data, n.label = L.Data(source=training_data,
                             backend=P.Data.LMDB,
                             batch_size=256,
                             ntop=2,
                             transform_param=dict(crop_size=224,
                                                  mean_file=mean_file,
                                                  mirror=True),
                             image_data_param=dict(shuffle=True),
                             include=dict(phase=0))
    # make test data layer
    n.test_data, n.test_label = L.Data(source=test_data,
                                       backend=P.Data.LMDB,
                                       batch_size=100,
                                       ntop=2,
                                       transform_param=dict(
                                           crop_size=224,
                                           mean_file=mean_file,
                                           mirror=False),
                                       include=dict(phase=1))
    # conv1 should accept both training and test data layers. But this is inconvenient to code in pycaffe.
    # You have to write two conv layers for them. To deal with this, I temporarily ignore the test data layer
    # and let conv1 accept the output of training data layer. Then, after making the whole prototxt, I postprocess
    # the top name of the two data layers, renaming their names to the same.

    n.conv = L.Convolution(
        n.data,
        kernel_size=7,
        stride=2,
        num_output=64,
        pad=3,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=weight_filler,
        bias_filler=bias_filler)
    n.bn = L.BatchNorm(n.conv, in_place=True)
    n.scale = L.Scale(n.bn, scale_param=dict(bias_term=True), in_place=True)
    n.relu = L.ReLU(n.scale, in_place=True)

    n.max_pooling = L.Pooling(n.relu,
                              pool=P.Pooling.MAX,
                              kernel_size=3,
                              stride=2,
                              pad=0)
    # set up a checkpoint so as to know where we get.
    checkpoint = 'n.max_pooling'

    # start making blocks.
    # num_feature_maps: the number of feature maps for each stage.
    #                   suggesting the network has three stages.
    # nblocks: a parameter from the original paper, telling us how many blocks there are in
    #                   each stage.
    # depth           :
    for i in range(n_stage):
        num_map = num_feature_maps[i]
        nblocks = block_config[i]
        first = None
        if (i == 0):
            stride = 1
        else:
            stride = 2
        for res in range(nblocks):
            # stage name
            stage = 'block' + str(res + 1) + '_stage' + str(i + 1)
            # use the projecting block when downsample the feature map
            if res == 0:
                if (i == 0):
                    first = 'pre_act'
                else:
                    first = 'both_act'
                make_res = 'n.' + 'bn_' + stage + '_pre,' + \
                           'n.' + 'scale_' + stage + '_pre,' + \
                           'n.' + 'relu_' + stage + '_pre,' + \
                           'n.' + 'conv_proj_' + stage + '_proj,' + \
                           'n.' + 'conv_' + stage + '_a, ' + \
                           'n.' + 'bn_' + stage + '_a, ' + \
                           'n.' + 'scale_' + stage + '_a, ' + \
                           'n.' + 'relu_' + stage + '_a, ' + \
                           'n.' + 'conv_' + stage + '_b, ' + \
                           'n.' + 'bn_' + stage + '_b, ' + \
                           'n.' + 'scale_' + stage + '_b, ' + \
                           'n.' + 'relu_' + stage + '_b, ' + \
                           'n.' + 'conv_' + stage + '_c, ' + \
                           'n.' + 'eltsum_' + stage + \
                           ' = project_residual(' + checkpoint + ', num_out=num_map, stride=' + str(stride) + ', first=\'' + first + '\')'
                exec(make_res)
                checkpoint = 'n.' + 'eltsum_' + stage  # where we get
                continue

            # most blocks have this shape
            make_res = 'n.' + 'bn_' + stage + '_pre, ' + \
                       'n.' + 'scale_' + stage + '_pre, ' + \
                       'n.' + 'relu_' + stage + '_pre, ' + \
                       'n.' + 'conv_' + stage + '_a, ' + \
                       'n.' + 'bn_' + stage + '_a, ' + \
                       'n.' + 'scale_' + stage + '_a, ' + \
                       'n.' + 'relu_' + stage + '_a, ' + \
                       'n.' + 'conv_' + stage + '_b, ' + \
                       'n.' + 'bn_' + stage + '_b, ' + \
                       'n.' + 'scale_' + stage + '_b, ' + \
                       'n.' + 'relu_' + stage + '_b, ' + \
                       'n.' + 'conv_' + stage + '_c, ' + \
                       'n.' + 'eltsum_' + stage + \
                       ' = identity_residual(' + checkpoint + ', num_out=num_map, stride=1)'
            exec(make_res)
            checkpoint = 'n.' + 'eltsum_' + stage  # where we get

    # add the bn, relu, ave-pooling layers
    exec('n.bn_end = L.BatchNorm(' + checkpoint + ', in_place=False)')
    n.scale_end = L.Scale(n.bn_end,
                          scale_param=dict(bias_term=True),
                          in_place=True)
    n.relu_end = L.ReLU(n.scale_end, in_place=True)
    n.pool_global = L.Pooling(n.relu_end,
                              pool=P.Pooling.AVE,
                              global_pooling=True)
    n.score = L.InnerProduct(
        n.pool_global,
        num_output=1000,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0))
    n.loss = L.SoftmaxWithLoss(n.score, n.label)
    n.acc = L.Accuracy(n.score, n.label)

    return n.to_proto()
Exemplo n.º 27
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
            seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../../data/sbdd/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../../data/pascal/VOC2011'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
            ntop=2, param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
        convolution_param=dict(num_output=21, kernel_size=4, stride=2,
            bias_term=False),
        param=[dict(lr_mult=0)])

    # scale pool4 skip for compatibility
    n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant',
        value=0.01), param=[dict(lr_mult=0)])
    n.score_pool4 = L.Convolution(n.scale_pool4, num_output=21, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.score_pool4c = crop(n.score_pool4, n.upscore2)
    n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
            operation=P.Eltwise.SUM)
    n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
        convolution_param=dict(num_output=21, kernel_size=4, stride=2,
            bias_term=False),
        param=[dict(lr_mult=0)])

    # scale pool3 skip for compatibility
    n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant',
        value=0.0001), param=[dict(lr_mult=0)])
    n.score_pool3 = L.Convolution(n.scale_pool3, num_output=21, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
    n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
            operation=P.Eltwise.SUM)
    n.upscore8 = L.Deconvolution(n.fuse_pool3,
        convolution_param=dict(num_output=21, kernel_size=16, stride=8,
            bias_term=False),
        param=[dict(lr_mult=0)])

    n.score = crop(n.upscore8, n.data)
    n.loss = L.SoftmaxWithLoss(n.score, n.label,
            loss_param=dict(normalize=False, ignore_label=255))

    return n.to_proto()
Exemplo n.º 28
0
def factorization_inception_resnet_a(bottom):
    """
    input:384x35x35
    output:384x35x35
    :param bottom: bottom layer
    :return: layers
    """
    conv_1x1 = L.Convolution(bottom, kernel_size=1, num_output=32, stride=1,
                             param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant', value=0))  # 32x35x35
    conv_1x1_bn = L.BatchNorm(conv_1x1, use_global_stats=False, in_place=True)
    conv_1x1_scale = L.Scale(conv_1x1, scale_param=dict(bias_term=True), in_place=True)
    conv_1x1_relu = L.ReLU(conv_1x1, in_place=True)

    conv_3x3_reduce = L.Convolution(bottom, kernel_size=1, num_output=32, stride=1,
                                    param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                    weight_filler=dict(type='xavier'),
                                    bias_filler=dict(type='constant', value=0))  # 32x35x35
    conv_3x3_reduce_bn = L.BatchNorm(conv_3x3_reduce, use_global_stats=False, in_place=True)
    conv_3x3_reduce_scale = L.Scale(conv_3x3_reduce, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_reduce_relu = L.ReLU(conv_3x3_reduce, in_place=True)

    conv_3x3 = L.Convolution(conv_3x3_reduce, kernel_size=3, num_output=32, stride=1, pad=1,
                             param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant', value=0))  # 32x35x35
    conv_3x3_bn = L.BatchNorm(conv_3x3, use_global_stats=False, in_place=True)
    conv_3x3_scale = L.Scale(conv_3x3, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_relu = L.ReLU(conv_3x3, in_place=True)

    conv_3x3_2_reduce = L.Convolution(bottom, kernel_size=1, num_output=32, stride=1,
                                      param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant', value=0))  # 32x35x35
    conv_3x3_2_reduce_bn = L.BatchNorm(conv_3x3_2_reduce, use_global_stats=False, in_place=True)
    conv_3x3_2_reduce_scale = L.Scale(conv_3x3_2_reduce, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_2_reduce_relu = L.ReLU(conv_3x3_2_reduce, in_place=True)

    conv_3x3_2 = L.Convolution(conv_3x3_2_reduce, kernel_size=3, num_output=48, stride=1, pad=1,
                               param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                               weight_filler=dict(type='xavier'),
                               bias_filler=dict(type='constant', value=0))  # 48x35x35
    conv_3x3_2_bn = L.BatchNorm(conv_3x3_2, use_global_stats=False, in_place=True)
    conv_3x3_2_scale = L.Scale(conv_3x3_2, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_2_relu = L.ReLU(conv_3x3_2, in_place=True)

    conv_3x3_3 = L.Convolution(conv_3x3_2, kernel_size=3, num_output=64, stride=1, pad=1,
                               param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                               weight_filler=dict(type='xavier'),
                               bias_filler=dict(type='constant', value=0))  # 64x35x35
    conv_3x3_3_bn = L.BatchNorm(conv_3x3_3, use_global_stats=False, in_place=True)
    conv_3x3_3_scale = L.Scale(conv_3x3_3, scale_param=dict(bias_term=True), in_place=True)
    conv_3x3_3_relu = L.ReLU(conv_3x3_3, in_place=True)

    concat = L.Concat(conv_1x1, conv_3x3, conv_3x3_3)  # 128x35x35

    conv_1x1_2 = L.Convolution(concat, kernel_size=1, num_output=384, stride=1,
                               param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                               weight_filler=dict(type='xavier'),
                               bias_filler=dict(type='constant', value=0))  # 384x35x35
    conv_1x1_2_bn = L.BatchNorm(conv_1x1_2, use_global_stats=False, in_place=True)
    conv_1x1_2_scale = L.Scale(conv_1x1_2, scale_param=dict(bias_term=True), in_place=True)
    # conv_1x1_2_relu = L.ReLU(conv_1x1_2_scale, in_place=True) # linear activation

    residual_eltwise = L.Eltwise(bottom, conv_1x1_2,
                                 eltwise_param=dict(operation=1))

    return conv_1x1, conv_1x1_bn, conv_1x1_scale, conv_1x1_relu, conv_3x3_reduce, conv_3x3_reduce_bn, \
           conv_3x3_reduce_scale, conv_3x3_reduce_relu, conv_3x3, conv_3x3_bn, conv_3x3_scale, conv_3x3_relu, \
           conv_3x3_2_reduce, conv_3x3_2_reduce_bn, conv_3x3_2_reduce_scale, conv_3x3_2_reduce_relu, \
           conv_3x3_2, conv_3x3_2_bn, conv_3x3_2_scale, conv_3x3_2_relu, conv_3x3_3, conv_3x3_3_bn, conv_3x3_3_scale, \
           conv_3x3_3_relu, concat, conv_1x1_2, conv_1x1_2_bn, conv_1x1_2_scale, residual_eltwise
Exemplo n.º 29
0
def Inception_v2(data, cin, co, relu=True, norm=True, is_train=True):
    '''
    一种Inception结构,使输入与输出大小保持不变。
    @data:待卷积数据
    @cin:输入通道数
    @cout:输出通道数
    @relu:输出是否进行Relu激活
    @norm:输出是否BatchNormalization
    '''
    assert (co % 4 == 0)
    cos = [co / 4] * 4
    #分支1:1*1卷积,步长为1
    if is_train:
        kwargs = kwargs = {'engine': 3}
    else:
        kwargs = {'engine': 3, 'use_global_stats': True}
    branch1 = L.Convolution(data,
                            kernel_size=1,
                            stride=1,
                            pad=0,
                            num_output=cos[0],
                            weight_filler=dict(type='xavier'))
    #分支2:Conv+BN+RELU+Conv
    branch2_conv1 = L.Convolution(data,
                                  kernel_size=1,
                                  stride=1,
                                  pad=0,
                                  num_output=2 * cos[1],
                                  weight_filler=dict(type='xavier'))
    branch2_norm1 = L.BatchNorm(branch2_conv1, **kwargs)
    branch2_scale1 = L.Scale(branch2_norm1, bias_term=True)
    branch2_relu1 = L.ReLU(branch2_scale1, engine=3)
    branch2 = L.Convolution(branch2_relu1,
                            kernel_size=3,
                            stride=1,
                            pad=1,
                            num_output=cos[1],
                            weight_filler=dict(type='xavier'))
    #分支3:Conv(1,1,0)+BN+RELU+Conv(5,1,2)
    branch3_conv1 = L.Convolution(data,
                                  kernel_size=1,
                                  stride=1,
                                  pad=0,
                                  num_output=2 * cos[2],
                                  weight_filler=dict(type='xavier'))
    branch3_norm1 = L.BatchNorm(branch3_conv1, **kwargs)
    branch3_scale1 = L.Scale(branch3_norm1, bias_term=True)
    branch3_relu1 = L.ReLU(branch3_scale1, engine=3)
    branch3 = L.Convolution(branch3_relu1,
                            kernel_size=5,
                            stride=1,
                            pad=2,
                            num_output=cos[2],
                            weight_filler=dict(type='xavier'))
    #分支4:MaxPool+Conv
    branch4_pool1 = L.Pooling(data,
                              kernel_size=3,
                              stride=1,
                              pad=1,
                              pool=P.Pooling.MAX)
    branch4 = L.Convolution(branch4_pool1,
                            kernel_size=1,
                            stride=1,
                            pad=0,
                            num_output=cos[3],
                            weight_filler=dict(type='xavier'))

    #concat branch1,branch2,branch3,branch4
    bottom_layers = [branch1, branch2, branch3, branch4]
    result = L.Concat(*bottom_layers)
    if norm:
        result = L.BatchNorm(result, **kwargs)
        result = L.Scale(result, bias_term=True)
    if relu:
        result = L.ReLU(result, engine=3)
    return result
def conv_factory(bottom, ks, nout, stride=1, pad=0):
    conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
                                num_output=nout, pad=pad, bias_term=True, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
    batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
    scale = L.Scale(batch_norm, bias_term=True, in_place=True)
    return scale