def add_rnn(n,
            data,
            act,
            clip,
            batch_size,
            T,
            K,
            num_step,
            lstm_dim=2048,
            mode='train'):
    add_lstm_init(n, batch_size, lstm_dim)
    n.clip_reshape = L.Reshape(clip, shape=dict(dim=[1, T, batch_size]))
    if mode is 'train' or mode is 'test_encode':
        clip_slice = L.Slice(n.clip_reshape, ntop=T, axis=1)
        if mode == 'train':
            act_slice = L.Slice(act, ntop=T - 1, axis=0)
            x = L.Slice(data, axis=0, ntop=T)
            x_set = ()
            label_set = ()
            silence_set = ()
        for i in range(T):
            t = tag(i + 1)
            n.tops['clip' + t] = clip_slice[i]
            if mode == 'train':
                n.tops['x' + t] = x[i]
                if i < T - 1:
                    n.tops['act' + t] = act_slice[i]
                if i < T - num_step:
                    x_set = x_set + (x[i], )
                if i < K - 1:
                    silence_set += (act_slice[i], )
                if i >= K:
                    label_set = label_set + (x[i], )
        if mode == 'train':
            n.x = L.Concat(*x_set, axis=0)
            n.label = L.Concat(*label_set, axis=0)
            add_lstm_encoder(n, n.x, batch_size, lstm_dim)
        else:
            add_lstm_encoder(n, data, batch_size, lstm_dim)
    if T > num_step:
        x_gate = L.Slice(n.x_gate, ntop=T - num_step, axis=0)
        if type(x_gate) is caffe.net_spec.Top:
            x_gate = (x_gate, )
    else:
        x_gate = ()

    for i in range(0, T):
        t_1 = tag(i)
        t = tag(i + 1)

        clip_t = n.tops[
            'clip' +
            t] if mode == 'train' or mode == 'test_encode' else n.clip_reshape
        n.tops['h_conted' + t_1] = eltwise(n.tops['h' + t_1], clip_t,
                                           P.Eltwise.SUM, True)
        # Decoding
        if i == T - num_step:
            if mode == 'train':
                h_set = ()
                act_set = ()
                for j in range(K, T - num_step + 1):
                    t_j = tag(j)
                    h_set = h_set + (n.tops['h_conted' + t_j], )
                    act_set = act_set + (n.tops['act' + t_j], )
                n.h = L.Concat(*h_set, axis=0)
                n.act_concat = L.Concat(*act_set, axis=0)
                top = add_decoder(n, n.h, n.act_concat)
            else:
                top = add_decoder(n, n.tops['h_conted' + t_1], act)
            x_outs = L.Slice(top, axis=0, ntop=T - num_step - K + 1)
            if type(x_outs) is caffe.net_spec.Top:
                x_outs = [x_outs]
            for j in range(K, T - num_step + 1):
                n.tops['x_hat' + tag(j + 1)] = x_outs[j - K]
            dec_tag = tag(2) if mode == 'train' else ''
            if mode == 'test_decode':
                add_lstm_encoder(n,
                                 n.tops['x_hat' + t],
                                 batch_size,
                                 lstm_dim=lstm_dim,
                                 flatten=False)
                x_gate = x_gate + (n.tops['x_gate'], )
            elif num_step > 1:
                add_lstm_encoder(n,
                                 n.tops['x_hat' + t],
                                 batch_size,
                                 lstm_dim=lstm_dim,
                                 t=t,
                                 tag=dec_tag,
                                 flatten=False)
                x_gate = x_gate + (n.tops['x_gate' + t], )

        if i > T - num_step:
            dec_t = tag(i - T + num_step + 1)
            dec_tp = tag(i - T + num_step + 2)
            top = add_decoder(n,
                              n.tops['h_conted' + t_1],
                              n.tops['act' + t_1],
                              tag=dec_t)
            n.tops['x_hat' + t] = top
            if i < T - 1:
                add_lstm_encoder(n,
                                 n.tops['x_hat' + t],
                                 batch_size,
                                 lstm_dim=lstm_dim,
                                 t=t,
                                 tag=dec_tp,
                                 flatten=False)
                x_gate = x_gate + (n.tops['x_gate' + t], )

        if i < T - 1 or mode is not 'train':
            # H-1 to H
            if mode is not 'test_decode':
                n.tops['x_gate' + t] = x_gate[i]
            n.tops['h_gate' + t] = fc(n.tops['h_conted' + t_1],
                                      4 * lstm_dim,
                                      weight_filler=dict(type='uniform',
                                                         min=-0.08,
                                                         max=0.08),
                                      param_name='Wh',
                                      axis=2,
                                      bias=False)
            n.tops['gate' + t] = eltwise(x_gate[i], n.tops['h_gate' + t],
                                         P.Eltwise.SUM)
            n.tops['c' + t], n.tops['h' + t] = L.LSTMUnit(
                n.tops['c' + t_1],
                n.tops['gate' + t],
                clip_t,
                ntop=2,
                clip_gradients=[0, 0.1, 0])

    # Define Loss functions
    if mode == 'train':
        x_hat = ()
        for i in range(K, T):
            t = tag(i + 1)
            x_hat = x_hat + (n.tops['x_hat' + t], )
        silence_set += (n.tops['c' + tag(T - 1)], )
        n.silence = L.Silence(*silence_set, ntop=0)
        n.x_hat = L.Concat(*x_hat, axis=0)
        n.label_flat = L.Flatten(n.label, axis=0, end_axis=1)
        n.l2_loss = L.EuclideanLoss(n.x_hat, n.label_flat)
    return n
Exemplo n.º 2
0
    def create_architecture(self, mode, hdf5_data):
        """Returns the architecture (i.e., caffe prototxt) of the model.

        Jer: One day this should probably be written to be more general.
        """

        arch = self.arch
        pars = self.pars
        n = caffe.NetSpec()

        if mode == 'deploy':
            n.data = L.DummyData(shape=[dict(dim=pars['deploy_dims'])])
        elif mode == 'train':
            n.data, n.label = L.HDF5Data(batch_size=pars['train_batch_size'],
                                         source=hdf5_data,
                                         ntop=pars['ntop'])
        else:  # Test.
            n.data, n.label = L.HDF5Data(batch_size=pars['test_batch_size'],
                                         source=hdf5_data,
                                         ntop=pars['ntop'])

        # print(n.to_proto())
        in_layer = n.data

        for layer in arch:
            layer_type, vals = layer

            if layer_type == 'e2e':
                in_layer = n.e2e = e2e_conv(in_layer, vals['n_filters'],
                                            vals['kernel_h'], vals['kernel_w'])
            elif layer_type == 'e2n':
                in_layer = n.e2n = e2n_conv(in_layer, vals['n_filters'],
                                            vals['kernel_h'], vals['kernel_w'])
            elif layer_type == 'fc':
                in_layer = n.fc = full_connect(in_layer, vals['n_filters'])
            elif layer_type == 'out':
                n.out = full_connect(in_layer, vals['n_filters'])
                # Rename to user specified unique layer name.
                # n.__setattr__('out', n.new_layer)

            elif layer_type == 'dropout':
                in_layer = n.dropout = L.Dropout(
                    in_layer,
                    in_place=True,
                    dropout_param=dict(dropout_ratio=vals['dropout_ratio']))
            elif layer_type == 'relu':
                in_layer = n.relu = L.ReLU(
                    in_layer,
                    in_place=True,
                    relu_param=dict(negative_slope=vals['negative_slope']))
            else:
                raise ValueError('Unknown layer type: ' + str(layer_type))

        # ~ end for.

        if mode != 'deploy':
            if self.pars['loss'] == 'EuclideanLoss':
                n.loss = L.EuclideanLoss(n.out, n.label)
            else:
                ValueError(
                    "Only 'EuclideanLoss' currently implemented for pars['loss']!"
                )
        return n
def euclidean_loss(bottom, label):
    return L.EuclideanLoss(bottom, label)
def caffenet(netmode):
    # Start Caffe proto net
    net = caffe.NetSpec()
    # Specify input data structures

    if netmode == caffe_pb2.TEST:
        if netconf.loss_function == 'malis':
            fmaps_end = 11

        if netconf.loss_function == 'euclid':
            fmaps_end = 11

        if netconf.loss_function == 'softmax':
            fmaps_end = 2

        net.data, net.datai = data_layer([1, 1, 572, 572])
        net.silence = L.Silence(net.datai, ntop=0)

        # Shape specs:
        # 00.    Convolution buffer size
        # 01.    Weight memory size
        # 03.    Num. channels
        # 04.    [d] parameter running value
        # 05.    [w] parameter running value
        run_shape_in = [[0, 0, 1, [1, 1], [572, 572]]]
        run_shape_out = run_shape_in

        last_blob = implement_usknet(net, run_shape_out, 64, fmaps_end)

        # Implement the prediction layer
        if netconf.loss_function == 'malis':
            net.prob = L.Sigmoid(last_blob, ntop=1)

        if netconf.loss_function == 'euclid':
            net.prob = L.Sigmoid(last_blob, ntop=1)

        if netconf.loss_function == 'softmax':
            net.prob = L.Softmax(last_blob, ntop=1)

        for i in range(0, len(run_shape_out)):
            print(run_shape_out[i])

        print("Max. memory requirements: %s B" %
              (compute_memory_buffers(run_shape_out) +
               compute_memory_weights(run_shape_out) +
               compute_memory_blobs(run_shape_out)))
        print("Weight memory: %s B" % compute_memory_weights(run_shape_out))
        print("Max. conv buffer: %s B" % compute_memory_buffers(run_shape_out))

    else:
        if netconf.loss_function == 'malis':
            net.data, net.datai = data_layer([1, 1, 572, 572])
            net.label, net.labeli = data_layer([1, 1, 388, 388])
            net.label_affinity, net.label_affinityi = data_layer(
                [1, 11, 16, 388, 388])
            net.affinity_edges, net.affinity_edgesi = data_layer([1, 1, 11, 3])
            net.silence = L.Silence(net.datai,
                                    net.labeli,
                                    net.label_affinityi,
                                    net.affinity_edgesi,
                                    ntop=0)
            fmaps_end = 11

        if netconf.loss_function == 'euclid':
            net.data, net.datai = data_layer([1, 1, 572, 572])
            net.label, net.labeli = data_layer([1, 3, 388, 388])
            net.scale, net.scalei = data_layer([1, 3, 388, 388])
            net.silence = L.Silence(net.datai, net.labeli, net.scalei, ntop=0)
            fmaps_end = 11

        if netconf.loss_function == 'softmax':
            net.data, net.datai = data_layer([1, 1, 572, 572])
            # Currently only supports binary classification
            net.label, net.labeli = data_layer([1, 1, 388, 388])
            net.silence = L.Silence(net.datai, net.labeli, ntop=0)
            fmaps_end = 2

        run_shape_in = [[0, 1, 1, [1, 1], [572, 338]]]
        run_shape_out = run_shape_in

        # Start the actual network
        last_blob = implement_usknet(net, run_shape_out, 64, fmaps_end)

        for i in range(0, len(run_shape_out)):
            print(run_shape_out[i])

        print("Max. memory requirements: %s B" %
              (compute_memory_buffers(run_shape_out) +
               compute_memory_weights(run_shape_out) +
               2 * compute_memory_blobs(run_shape_out)))
        print("Weight memory: %s B" % compute_memory_weights(run_shape_out))
        print("Max. conv buffer: %s B" % compute_memory_buffers(run_shape_out))

        # Implement the loss
        if netconf.loss_function == 'malis':
            last_blob = L.Sigmoid(last_blob, in_place=True)
            net.loss = L.MalisLoss(last_blob,
                                   net.label_affinity,
                                   net.label,
                                   net.affinity_edges,
                                   ntop=0)

        if netconf.loss_function == 'euclid':
            last_blob = L.Sigmoid(last_blob, in_place=True)
            net.loss = L.EuclideanLoss(last_blob, net.label, net.scale, ntop=0)

        if netconf.loss_function == 'softmax':
            net.loss = L.SoftmaxWithLoss(last_blob, net.label, ntop=0)

    # Return the protocol buffer of the generated network
    return net.to_proto()
def setLayers_twoBranches(data_source,
                          batch_size,
                          layername,
                          kernel,
                          stride,
                          outCH,
                          label_name,
                          transform_param_in,
                          deploy=False,
                          batchnorm=0,
                          lr_mult_distro=[1, 1, 1]):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)
    num_parts = transform_param['num_parts']

    if deploy == False and "lmdb" not in data_source:
        if (len(label_name) == 1):
            n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict(
                batch_size=batch_size, source=data_source),
                                                       ntop=2)
        elif (len(label_name) == 2):
            n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data(
                hdf5_data_param=dict(batch_size=batch_size,
                                     source=data_source),
                ntop=3)
    # produce data definition for deploy net
    elif deploy == False:
        n.data, n.tops['label'] = L.CPMData(
            data_param=dict(backend=1,
                            source=data_source,
                            batch_size=batch_size),
            cpm_transform_param=transform_param_in,
            ntop=2)
        n.tops[label_name[2]], n.tops[label_name[3]], n.tops[
            label_name[4]], n.tops[label_name[5]] = L.Slice(
                n.label,
                slice_param=dict(
                    axis=1, slice_point=[38, num_parts + 1, num_parts + 39]),
                ntop=4)
        n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]],
                                          n.tops[label_name[4]],
                                          operation=P.Eltwise.PROD)
        n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]],
                                          n.tops[label_name[5]],
                                          operation=P.Eltwise.PROD)

    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.silence2 = L.Silence(n.center_map, ntop=0)
    #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = ['image', 'image']
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    local_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'V':  #pretrained VGG layers
            conv_name = 'conv%d_%d' % (pool_counter, local_counter)
            lr_m = lr_mult_distro[0]
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)
            ReLUname = 'relu%d_%d' % (pool_counter, local_counter)
            n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True)
            local_counter += 1
            print ReLUname
        if layername[l] == 'B':
            pool_counter += 1
            local_counter = 1
        if layername[l] == 'C':
            if state == 'image':
                #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                conv_name = 'conv%d_%d_CPM' % (
                    pool_counter, local_counter
                )  # no image state in subsequent stages
                if stage == 1:
                    lr_m = lr_mult_distro[1]
                else:
                    lr_m = lr_mult_distro[1]
            else:  # fuse
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
                lr_m = lr_mult_distro[2]
                conv_counter += 1
            #if stage == 1:
            #    lr_m = 1
            #else:
            #    lr_m = lr_sub
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)

            if layername[l + 1] != 'L':
                if (state == 'image'):
                    if (batchnorm == 1):
                        batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                else:
                    if (batchnorm == 1):
                        batchnorm_name = 'Mbn%d_stage%d' % (conv_counter,
                                                            stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                #last_layer = ReLUname
                print ReLUname

            #conv_counter += 1
            local_counter += 1

        elif layername[l] == 'C2':
            for level in range(0, 2):
                if state == 'image':
                    #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                    conv_name = 'conv%d_%d_CPM_L%d' % (
                        pool_counter, local_counter, level + 1
                    )  # no image state in subsequent stages
                    if stage == 1:
                        lr_m = lr_mult_distro[1]
                    else:
                        lr_m = lr_mult_distro[1]
                else:  # fuse
                    conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage,
                                                         level + 1)
                    lr_m = lr_mult_distro[2]
                    #conv_counter += 1
                #if stage == 1:
                #    lr_m = 1
                #else:
                #    lr_m = lr_sub
                if layername[l + 1] == 'L2' or layername[l + 1] == 'L3':
                    if level == 0:
                        outCH[
                            l] = 26  # 2*13 #38 each limb composed of two points
                    else:
                        outCH[l] = 15  #19  add background

                n.tops[conv_name] = L.Convolution(
                    n.tops[last_layer[level]],
                    kernel_size=kernel[l],
                    num_output=outCH[l],
                    pad=int(math.floor(kernel[l] / 2)),
                    param=[
                        dict(lr_mult=lr_m, decay_mult=1),
                        dict(lr_mult=lr_m * 2, decay_mult=0)
                    ],
                    weight_filler=dict(type='gaussian', std=0.01),
                    bias_filler=dict(type='constant'))
                last_layer[level] = conv_name
                print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m)

                if layername[l + 1] != 'L2' and layername[l + 1] != 'L3':
                    if (state == 'image'):
                        if (batchnorm == 1):
                            batchnorm_name = 'bn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                        ReLUname = 'relu%d_%d_CPM_L%d' % (
                            pool_counter, local_counter, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    else:
                        if (batchnorm == 1):
                            batchnorm_name = 'Mbn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter,
                                                            stage, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    print ReLUname

            conv_counter += 1
            local_counter += 1

        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
            local_counter = 1
            conv_counter += 1
            print last_layer[0]

        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False and "lmdb" not in data_source:
                n.tops['map_vec_stage%d' % stage] = L.Flatten(
                    n.tops[last_layer[0]])
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]])
            elif deploy == False:
                level = 1
                name = 'weight_stage%d' % stage
                n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                         n.tops[label_name[(level + 2)]],
                                         operation=P.Eltwise.PROD)
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops[name], n.tops[label_name[level]])

            print 'loss %d' % stage
            stage += 1
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L2':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            for level in range(0, 2):
                if deploy == False and "lmdb" not in data_source:
                    n.tops['map_vec_stage%d_L%d' %
                           (stage, level + 1)] = L.Flatten(
                               n.tops[last_layer[level]])
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops['map_vec_stage%d' % stage],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])
                elif deploy == False:
                    name = 'weight_stage%d_L%d' % (stage, level + 1)
                    n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                             n.tops[label_name[(level + 2)]],
                                             operation=P.Eltwise.PROD)
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops[name],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])

                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L3':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            if deploy == False:
                level = 0
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.Euclidean2Loss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           n.tops[label_name[2]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)
                level = 1
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.EuclideanLoss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer[0]],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            #if not share_point:
            #    share_point = last_layer
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer[0]],
                n.tops[last_layer[1]],
                n.tops[share_point],
                concat_param=dict(axis=1))

            local_counter = 1
            state = 'fuse'
            last_layer[0] = 'concat_stage%d' % stage
            last_layer[1] = 'concat_stage%d' % stage
            print last_layer
        elif layername[l] == '$':
            share_point = last_layer[0]
            pool_counter += 1
            local_counter = 1
            print 'share'

    # final process
    stage -= 1
    #if stage == 1:
    #    n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
Exemplo n.º 6
0
def RemPoseStage_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=1, \
                       mask_vec="vec_mask", mask_heat="heat_mask", \
                       label_vec="vec_label", label_heat="heat_label", \
                       short_cut=True, base_layer="convf", lr=1, decay=1):
    kwargs = {
        'param': [
            dict(lr_mult=lr, decay_mult=decay),
            dict(lr_mult=2 * lr, decay_mult=0)
        ],
        'weight_filler':
        dict(type='gaussian', std=0.01),
        'bias_filler':
        dict(type='constant', value=0)
    }
    assert from_layer in net.keys()
    assert len(vec_channels) == len(heat_channels)
    assert len(vec_channels) == len(vec_kernels)
    assert len(vec_channels) == len(vec_pads)
    assert len(heat_channels) == len(heat_pads)
    assert len(heat_channels) == len(heat_kernels)
    from1_layer = from_layer
    from2_layer = from_layer
    for layer in range(1, use_layers):
        # vec
        conv_vec = "stage{}_conv{}_vec".format(stage, layer)
        net[conv_vec] = L.Convolution(net[from1_layer], num_output=vec_channels[layer-1], \
pad=vec_pads[layer-1], kernel_size=vec_kernels[layer-1], **kwargs)
        relu_vec = "stage{}_relu{}_vec".format(stage, layer)
        net[relu_vec] = L.ReLU(net[conv_vec], in_place=True)
        from1_layer = relu_vec
        # heat
        conv_heat = "stage{}_conv{}_heat".format(stage, layer)
        net[conv_heat] = L.Convolution(net[from2_layer], num_output=heat_channels[layer-1], \
 pad=heat_pads[layer-1], kernel_size=heat_kernels[layer-1], **kwargs)
        relu_heat = "stage{}_relu{}_heat".format(stage, layer)
        net[relu_heat] = L.ReLU(net[conv_heat], in_place=True)
        from2_layer = relu_heat
    # output
    conv_vec = "stage{}_conv{}_vec".format(stage, use_layers)
    net[conv_vec] = L.Convolution(net[from1_layer], num_output=vec_channels[use_layers-1], \
pad=vec_pads[use_layers-1], kernel_size=vec_kernels[use_layers-1], **kwargs)
    conv_heat = "stage{}_conv{}_heat".format(stage, use_layers)
    net[conv_heat] = L.Convolution(net[from2_layer], num_output=heat_channels[use_layers-1], \
pad=heat_pads[use_layers-1], kernel_size=heat_kernels[use_layers-1], **kwargs)
    weight_vec = "weight_stage{}_vec".format(stage)
    weight_heat = "weight_stage{}_heat".format(stage)
    loss_vec = "loss_stage{}_vec".format(stage)
    loss_heat = "loss_stage{}_heat".format(stage)
    net[weight_vec] = L.Eltwise(net[conv_vec],
                                net[mask_vec],
                                eltwise_param=dict(operation=P.Eltwise.PROD))
    net[loss_vec] = L.EuclideanLoss(net[weight_vec],
                                    net[label_vec],
                                    loss_weight=1)
    net[weight_heat] = L.Eltwise(net[conv_heat],
                                 net[mask_heat],
                                 eltwise_param=dict(operation=P.Eltwise.PROD))
    net[loss_heat] = L.EuclideanLoss(net[weight_heat],
                                     net[label_heat],
                                     loss_weight=1)
    # 特征拼接
    if short_cut:
        fea_layers = []
        fea_layers.append(net[conv_vec])
        fea_layers.append(net[conv_heat])
        assert base_layer in net.keys()
        fea_layers.append(net[base_layer])
        net[out_layer] = L.Concat(*fea_layers, axis=1)
    return net
Exemplo n.º 7
0
def setLayers(data_source,
              batch_size,
              layername,
              kernel,
              stride,
              outCH,
              label_name,
              transform_param_in,
              deploy=False):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)

    # produce data definition for deploy net
    if deploy == False:
        # here we will return the new structure for loading h36m dataset
        n.data, n.tops['label'] = L.CPMData(cpmdata_param=dict(
            backend=1, source=data_source, batch_size=batch_size),
                                            transform_param=transform_param_in,
                                            ntop=2)
        n.tops[label_name[1]], n.tops[label_name[0]], n.tops[
            label_name[2]] = L.Slice(n.label,
                                     slice_param=dict(axis=1,
                                                      slice_point=[18, 36]),
                                     ntop=3)
        n.image, n.center_map = L.Slice(n.data,
                                        slice_param=dict(axis=1,
                                                         slice_point=3),
                                        ntop=2)
    else:
        input = "data"
        dim1 = 18
        dim2 = 5
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()
        # Slice layer slices input layer to multiple output along a given dimension
        # axis: 1 define in which dimension to slice
        # slice_point: 3 define the index in the selected dimension (the number of
        # indices must be equal to the number of top blobs minus one)
        # Considering input Nx3x1x1, by slice_point = 2
        # top1 : Nx2x1x1
        # top2 : Nx1x1x1
        n.image, n.center_map, n.tops[label_name[2]] = L.Slice(
            n.data, slice_param=dict(axis=1, slice_point=[3, 4]), ntop=3)

    n.pool_center_lower = L.Pooling(n.center_map,
                                    kernel_size=9,
                                    stride=8,
                                    pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = 'image'
    stage = 1
    conv_counter = 1
    last_manifold = 'NONE'
    last_merg = 'NONE'
    pool_counter = 1
    drop_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0
    manifold_current_stage = False
    merge_init_avg = False

    for l in range(0, len(layername)):
        decay_mult = 1

        if layername[l] == 'C':
            if state == 'image':
                conv_name = 'conv%d_stage%d' % (conv_counter, stage)
            else:
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
            #if stage == 1:
            #    lr_m = 5
            #else:
            #    lr_m = 1
            lr_m = 1e-3  # 1e-3 (best res so far)
            if ((stage == 1 and conv_counter == 7) or
                (stage > 1 and state != 'image' and (conv_counter in [1, 5]))):
                conv_name = '%s_new' % conv_name
                lr_m = 1  #1e-2
                decay_mult = 1

#            if (stage <= 4):
#                lr_m = 0
#                decay_mult = 0
# additional for python layer
#            if (stage > 1 and state != 'image' and (conv_counter == 1)):
#                conv_name = '%s_mf' % conv_name
#                lr_m = 1 #1e-1
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=decay_mult),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer = conv_name
            if not (layername[l + 1] == 'L' or layername[l + 1] == 'M'):
                if (state == 'image'):
                    ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                else:
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                last_layer = ReLUname
            conv_counter += 1
        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
        elif layername[l] == 'M':
            last_manifold = 'manifolds_stage%d' % stage
            last_merg = 'merge_hm_stage%d' % stage
            debug_mode = 0
            #            if (stage == 5):
            #                debug_mode = 1
            manifold_current_stage = True
            if (stage >= 4):
                merge_init_avg = True
            # TODO: change it back


#            if stage == 4:
#                debug_mode = 4
            parameters = '{"njoints": 17,"sigma": 1, "debug_mode": %r, "max_area": 100, "percentage_max": 3, "train": %u, "Lambda": %.3f }' % (
                debug_mode, not deploy, 0.05)
            # DONE: change it back
            # if manifold_current_stage:
            n.tops[last_manifold] = L.Python(
                n.tops[last_layer],
                n.tops[label_name[2]],
                python_param=dict(module='newheatmaps',
                                  layer='MyCustomLayer',
                                  param_str=parameters))  #,loss_weight=1)
            #            n.tops[last_manifold] = L.Python(n.tops[label_name[1]],n.tops[label_name[2]],python_param=dict(module='newheatmaps',layer='MyCustomLayer',param_str=parameters))#,loss_weight=1)
            init_str = 'zero'
            if merge_init_avg:
                init_str = 'avg'
            merge_lr = 5e-2
            parameters = '{"init": %r, "learning_rate": %r}' % (init_str,
                                                                merge_lr)
            n.tops[last_merg] = L.Python(n.tops[last_layer],
                                         n.tops[last_manifold],
                                         python_param=dict(
                                             module='processheatmaps',
                                             layer='MergeHeatMaps',
                                             param_str=parameters))
        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False:
                if stage == 1:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[0]])
                else:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[1]])

            stage += 1
            last_connect = last_layer
            last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            state = 'image'
        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            # DONE: change it back
            #  no this          n.tops['concat_stage%d' % stage] = L.Concat(n.tops[last_layer], n.tops[last_connect], n.pool_center_lower, n.tops[label_name[1]], concat_param=dict(axis=1))
            #            n.tops['concat_stage%d' % stage] = L.Concat(n.tops[last_layer], n.tops[last_connect], n.pool_center_lower, n.tops[last_manifold], concat_param=dict(axis=1))
            if manifold_current_stage:
                n.tops['concat_stage%d' % stage] = L.Concat(
                    n.tops[last_layer],
                    n.tops[last_merg],
                    n.pool_center_lower,
                    concat_param=dict(axis=1))
            else:
                n.tops['concat_stage%d' % stage] = L.Concat(
                    n.tops[last_layer],
                    n.tops[last_connect],
                    n.pool_center_lower,
                    concat_param=dict(axis=1))

            conv_counter = 1
            state = 'fuse'
            last_layer = 'concat_stage%d' % stage
        elif layername[l] == '$':
            if not share_point:
                share_point = last_layer
            else:
                last_layer = share_point
    # final process
    stage -= 1
    if stage == 1:
        n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])