Exemplo n.º 1
0
def centroid_pos_color_loss(trans_features, computed_spixel_feat,
                            new_spix_indices, num_spixels, l_weight_pos,
                            l_weight_color):

    new_spixel_features = L.SpixelFeature(trans_features, new_spix_indices,
                                          spixel_feature_param =\
        dict(type = P.SpixelFeature.AVGRGB, rgb_scale = 1.0, ignore_idx_value = -10,
             ignore_feature_value = 255, max_spixels = int(num_spixels)), propagate_down = [True, False])

    pos_recon_feat, color_recon_feat = L.Slice(computed_spixel_feat,
                                               slice_param=dict(axis=1,
                                                                slice_point=2),
                                               ntop=2)

    pos_pix_feat, color_pix_feat = L.Slice(new_spixel_features,
                                           slice_param=dict(axis=1,
                                                            slice_point=2),
                                           ntop=2)

    pos_loss = L.EuclideanLoss(pos_recon_feat,
                               pos_pix_feat,
                               loss_weight=l_weight_pos)
    color_loss = L.EuclideanLoss(color_recon_feat,
                                 color_pix_feat,
                                 loss_weight=l_weight_color)

    return pos_loss, color_loss
Exemplo n.º 2
0
    def gru(self,
            prefix,
            x,
            cont,
            static=None,
            h=None,
            batch_size=100,
            T=0,
            gru_hidden=1000,
            weight_lr_mult=1,
            bias_lr_mult=2,
            weight_decay_mult=1,
            bias_decay_mult=0,
            weight_filler=None,
            bias_filler=None):

        if not weight_filler:
            weight_filler = self.uniform_weight_filler(-0.08, 0.08)
        if not bias_filler:
            bias_filler = self.constant_filler(0)
        if not h:
            h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1)

        gate_dim = gru_hidden * 3
        if static:  #assume static NXF blob
            static_transform = L.InnerProduct(static,
                                              num_output=gate_dim,
                                              axis=2,
                                              weight_filler=weight_filler,
                                              bias_filler=bias_filler)
            static_transform = L.Reshape(static,
                                         shape=dict(dim=[1, -1, gate_dim]))
            self.rename_tops(static_transform, '%s_x_static' % prefix)

        h = None

        x_in = L.Slice(x, ntop=self.T, axis=0)
        cont_in = L.Slice(cont, ntop=self.T, axis=0)

        for t in range(T):
            h = self.gru_unit(prefix,
                              x_in[t],
                              cont[t],
                              static,
                              h,
                              batch_size=batch - size,
                              timestep=t,
                              gru_hidden=gru_hidden,
                              weight_lr_mult=weight_lr_mult,
                              bias_lr_mult=bias_lr_mult,
                              weight_decay_mult=weight_dicay_mult,
                              bias_decay_mult=bias_decay_mult,
                              weight_filler=weight_filler,
                              bias_filler=bias_filler)

        return h
Exemplo n.º 3
0
    def _semantic_regularization(self, xSemPr, xSemLb, semReg):
        ns = self.netspec

        if self.semantics == ATTRIBUTES:
            name = 'SCoRe/semLoss'
            ns[name] = L.SigmoidCrossEntropyLoss(
                *[xSemPr, xSemLb],
                name=name,
                loss_weight=semReg / (len(self.constrains) * np.sqrt(2.)) *
                10.,
                include=dict(phase=caffe.TRAIN))
        else:
            c_keys = [key for key in self.constrains.keys()]
            losses = ['SCoRe/semLoss/%s' % key for key in c_keys]
            scores = ['SCoRe/semLoss/%s/scores' % key for key in c_keys]
            labels = ['SCoRe/semLoss/%s/labels' % key for key in c_keys]

            # Slice semantic scores
            xSemPr_name = [k for k, v in ns.tops.iteritems() if v == xSemPr][0]
            slice_scores = L.Slice(name='SCoRe/semLoss/slice_scores',
                                   bottom=[xSemPr_name],
                                   ntop=len(scores),
                                   top=scores,
                                   in_place=True,
                                   slice_point=np.cumsum(
                                       self.num_states)[:-1].tolist(),
                                   include=dict(phase=caffe.TRAIN))

            # Slice semantic labels
            xSemLb_name = [k for k, v in ns.tops.iteritems() if v == xSemLb][0]
            slice_labels = L.Slice(name='SCoRe/semLoss/slice_labels',
                                   bottom=[xSemLb_name],
                                   ntop=len(labels),
                                   top=labels,
                                   in_place=True,
                                   slice_point=range(1, len(self.constrains)),
                                   include=dict(phase=caffe.TRAIN))

            # Add supervision to each slice
            for i, xLoss in enumerate(losses):
                ns[xLoss] = L.SoftmaxWithLoss(
                    *[slice_scores[i], slice_labels[i]],
                    name=xLoss,
                    loss_weight=semReg / len(self.constrains),
                    include=dict(phase=caffe.TRAIN))

            # Summarize supervisions for display
            ns['SCoRe/semLoss'] = L.Eltwise(
                *[ns[l] for l in losses],
                name='SCoRe/semLoss',
                operation=P.Eltwise.SUM,
                coeff=[semReg / len(self.constrains)] * len(losses),
                include=dict(phase=caffe.TRAIN))
def add_cnn(n, data, act, batch_size, T, K, num_step, mode='train'):
    n.x_flat = L.Flatten(data, axis=1, end_axis=2)
    n.act_flat = L.Flatten(act, axis=1, end_axis=2)
    if mode == 'train':
        x = L.Slice(n.x_flat, axis=1, ntop=T)
        act_slice = L.Slice(n.act_flat, axis=1, ntop=T - 1)
        x_set = ()
        label_set = ()
        x_hat_set = ()
        silence_set = ()
        for i in range(T):
            t = tag(i + 1)
            n.tops['x' + t] = x[i]
            if i < K:
                x_set += (x[i], )
            if i < T - 1:
                n.tops['act' + t] = act_slice[i]
            if i < K - 1:
                silence_set += (n.tops['act' + t], )
            if i >= K:
                label_set += (x[i], )
        n.label = L.Concat(*label_set, axis=0)
        input_list = list(x_set)
        for step in range(0, num_step):
            step_tag = tag(step + 1) if step > 0 else ''
            t = tag(step + K)
            tp = tag(step + K + 1)
            input_tuple = tuple(input_list)
            n.tops['input' + step_tag] = L.Concat(*input_tuple, axis=1)
            top = add_conv_enc(n, n.tops['input' + step_tag], tag=step_tag)
            n.tops['x_hat' + tp] = add_decoder(n,
                                               top,
                                               n.tops['act' + t],
                                               flatten=False,
                                               tag=step_tag)
            input_list.pop(0)
            input_list.append(n.tops['x_hat' + tp])
    else:
        top = add_conv_enc(n, n.x_flat)
        n.tops['x_hat' + tag(K + 1)] = add_decoder(n,
                                                   top,
                                                   n.act_flat,
                                                   flatten=False)
    if mode == 'train':
        x_hat = ()
        for i in range(K, T):
            t = tag(i + 1)
            x_hat += (n.tops['x_hat' + t], )
        n.x_hat = L.Concat(*x_hat, axis=0)
        n.silence = L.Silence(*silence_set, ntop=0)
        n.l2_loss = L.EuclideanLoss(n.x_hat, n.label)
    return n
Exemplo n.º 5
0
def data_layer_trimese(net,
                       inputdb,
                       mean_file,
                       batch_size,
                       net_type,
                       height,
                       width,
                       nchannels,
                       slice_points,
                       crop_size=-1):
    data, label = data_layer_stacked(net,
                                     inputdb,
                                     mean_file,
                                     batch_size,
                                     net_type,
                                     height,
                                     width,
                                     nchannels,
                                     crop_size=crop_size)
    slices = L.Slice(data[0],
                     ntop=3,
                     name="data_trimese",
                     slice_param=dict(axis=1, slice_point=slice_points))
    #for n,slice in enumerate(slices):
    #    net.__setattr__( slice, "data_plane%d"%(n) )

    return slices, label
Exemplo n.º 6
0
    def context_supervision_loss(self, distance, lw=1, ind_loss=None):
        """
    Distance is positive; want gt distance to be SMALLER than other distances.
    Loss used for context supervision is also ranking loss:
        Look at rank loss between all possible pairs of moments; want gt distance to be smaller.
        Take average.
    """

        slices = L.Slice(distance, ntop=21, axis=1)
        gt = slices[0]
        setattr(self.n, 'gt_slice', gt)
        ranking_losses = []
        for i in range(1, 21):
            setattr(self.n, 'context_slice_%d' % i, slices[i])
            negate_distance = L.Power(slices[i], scale=-1)
            max_sum = L.Eltwise(gt, negate_distance, operation=1)
            max_sum_margin = L.Power(max_sum, shift=self.margin)
            max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
            if ind_loss:
                max_sum_margin_relu = L.Reshape(
                    max_sum_margin_relu, shape=dict(dim=[self.batch_size, 1]))
                max_sum_margin_relu = L.Eltwise(max_sum_margin_relu,
                                                ind_loss,
                                                operation=0)
            setattr(self.n, 'max_sum_margin_relu_%d' % i, max_sum_margin_relu)
            ranking_loss = L.Reduction(max_sum_margin_relu, operation=4)
            ranking_losses.append(ranking_loss)
        sum_ranking_losses = L.Eltwise(*ranking_losses, operation=1)
        loss = L.Power(sum_ranking_losses, scale=1 / 21., loss_weight=[lw])
        return loss
Exemplo n.º 7
0
def exmaple_use_of_lstm():
  T = 3 # number of time steps
  B = 10 # batch size
  lstm_output = 500 # dimension of LSTM unit

  # use net spec
  ns = caffe.NetSpec()

  # we need initial values for h and c
  ns.h0 = L.DummyData(name='h0', dummy_data_param={'shape':{'dim':[1,B,lstm_output]},
                               'data_filler':{'type':'constant','value':0}})

  ns.c0 = L.DummyData(name='c0', dummy_data_param={'shape':{'dim':[1,B,lstm_output]},
                                   'data_filler':{'type':'constant','value':0}})

  # simulate input X over T time steps and B sequences (batch size)
  ns.X = L.DummyData(name='X', dummy_data_param={'shape': {'dim':[T,B,128,10,10]}} )
  # slice X for T time steps
  xt = L.Slice(ns.X, name='slice_X',ntop=T,slice_param={'axis':0,'slice_point':range(1,T)})
  # unroling
  h = ns.h0
  c = ns.c0
  lstm_weights = None
  tops = []
  for t in xrange(T):
    c, h, lstm_weights = single_time_step_lstm( ns, h, c, xt[t], 't'+str(t)+'/', lstm_output, lstm_weights)
    tops.append(h)
    ns.__setattr__('c'+str(t),c)
    ns.__setattr__('h'+str(t),h)
  # concat all LSTM tops (h[t]) to a single layer
  ns.H = L.Concat( *tops, name='concat_h',concat_param={'axis':0} )
  return ns
Exemplo n.º 8
0
def concat_slice_net():
    n = caffe.NetSpec()
    n.data = L.DummyData(dummy_data_param=dict(num=20,channels=50,height=64,width=64,data_filler=dict(type="gaussian")))
    n.a, n.b,n.c = L.Slice(n.data, ntop=3, slice_point=[20,30],axis=0)
    n.d = L.Concat(n.a,n.b,axis=0)
    n.e = L.Eltwise(n.a,n.c)

    return n.to_proto()
Exemplo n.º 9
0
 def test_slice(self):
     n = caffe.NetSpec()
     n.input1 = L.Input(shape=make_shape([6, 4, 64, 64]))
     n.output1, n.output2, n.output3 = L.Slice(n.input1,
                                               ntop=3,
                                               axis=1,
                                               slice_point=[1, 3])
     self._test_model(*self._netspec_to_model(n, 'slice'))
Exemplo n.º 10
0
def slice_layer(net, layername, inputlayer, axis, slice_points):
    slices = L.Slice(inputlayer,
                     ntop=3,
                     name=layername,
                     slice_param=dict(axis=axis, slice_point=slice_points))
    for n, slic in enumerate(slices):
        net.__setattr__(layername + "_%d" % (n), slic)
    return slices
Exemplo n.º 11
0
def mPoseNet_Decomp_3S_Train(net,
                             data_layer="data",
                             label_layer="label",
                             train=True,
                             **decomp_kwargs):
    # input
    if train:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \
            L.Slice(net[label_layer], ntop=4, slice_param=dict(slice_point=[34,52,86], axis=1))
    else:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \
            L.Slice(net[label_layer], ntop=5, slice_param=dict(slice_point=[34,52,86,104], axis=1))
    # label
    net.vec_label = L.Eltwise(net.vec_mask,
                              net.vec_temp,
                              eltwise_param=dict(operation=P.Eltwise.PROD))
    net.heat_label = L.Eltwise(net.heat_mask,
                               net.heat_temp,
                               eltwise_param=dict(operation=P.Eltwise.PROD))
    # Darknet19
    net = YoloNetPart_Decomp(net, from_layer=data_layer, use_bn=True, use_layers=5, use_sub_layers=5, \
            final_pool=False, lr=1, decay=1, **decomp_kwargs)
    # concat conv4_3 & conv5_5
    net = UnifiedMultiScaleLayers(net, layers=["conv4_3_c","conv5_5_c"], tags=["Ref","Up"], \
                                  unifiedlayer="convf", upsampleMethod="Reorg")
    # Stages
    baselayer = "convf"
    use_3_layers = 5
    use_1_layers = 0
    net = mPose_StageX_decomp_Train(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \
                           mask_vec="vec_mask", mask_heat="heat_mask", \
                           label_vec="vec_label", label_heat="heat_label", \
                           use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \
                           base_layer=baselayer, lr=4, decay=1, **decomp_kwargs)
    net = mPose_StageX_decomp_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \
                           mask_vec="vec_mask", mask_heat="heat_mask", \
                           label_vec="vec_label", label_heat="heat_label", \
                           use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \
                           base_layer=baselayer, lr=4, decay=1, **decomp_kwargs)
    net = mPose_StageX_decomp_Train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \
                           mask_vec="vec_mask", mask_heat="heat_mask", \
                           label_vec="vec_label", label_heat="heat_label", \
                           use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=False, \
                           lr=4, decay=1, **decomp_kwargs)
    return net
Exemplo n.º 12
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     # slice point 拆分点
     n.dout1, n.dout2, n.dout3 = L.Slice(n.data,
                                         slice_param={'axis': 0},
                                         ntop=3,
                                         slice_point=[10, 15])
     # n.lr, n.lg, n.lb = L.Slice(n.data, slice_param={'slice_dim':0},ntop=3,slice_point=[10, 15])
     return n.to_proto()
Exemplo n.º 13
0
def position_color_loss(recon_feat, pixel_features, pos_weight, col_weight):

    pos_recon_feat, color_recon_feat = L.Slice(recon_feat,
                                               slice_param=dict(axis=1,
                                                                slice_point=2),
                                               ntop=2)

    pos_pix_feat, color_pix_feat = L.Slice(pixel_features,
                                           slice_param=dict(axis=1,
                                                            slice_point=2),
                                           ntop=2)

    pos_loss = L.EuclideanLoss(pos_recon_feat,
                               pos_pix_feat,
                               loss_weight=pos_weight)
    color_loss = L.EuclideanLoss(color_recon_feat,
                                 color_pix_feat,
                                 loss_weight=col_weight)

    return pos_loss, color_loss
Exemplo n.º 14
0
def centroid_pos_color_loss2(trans_features, new_spixel_features, num_spixels,
                             l_weight_pos, l_weight_color):

    pos_recon_feat, color_recon_feat = L.Slice(new_spixel_features,
                                               slice_param=dict(axis=1,
                                                                slice_point=2),
                                               ntop=2)

    pos_pix_feat, color_pix_feat = L.Slice(trans_features,
                                           slice_param=dict(axis=1,
                                                            slice_point=2),
                                           ntop=2)

    pos_loss = L.EuclideanLoss(pos_recon_feat,
                               pos_pix_feat,
                               loss_weight=l_weight_pos)
    color_loss = L.EuclideanLoss(color_recon_feat,
                                 color_pix_feat,
                                 loss_weight=l_weight_color)

    return pos_loss, color_loss
Exemplo n.º 15
0
 def generate_caffe_prototxt(self, caffe_net, layer):
     if self.stride == 1:
         layer_x1, layer_x2 = L.Slice(layer, ntop=2, axis=1, slice_point=[self.in_channels//2])
         caffe_net[self.g_name + '/slice1'] = layer_x1
         caffe_net[self.g_name + '/slice2'] = layer_x2
         layer_x2 = slim.generate_caffe_prototxt(self.conv, caffe_net, layer_x2)
     else:
         layer_x1 = slim.generate_caffe_prototxt(self.conv0, caffe_net, layer)
         layer_x2 = slim.generate_caffe_prototxt(self.conv, caffe_net, layer)
     layer = L.Concat(layer_x1, layer_x2, axis=1)
     caffe_net[self.g_name + '/concat'] = layer
     layer = slim.generate_caffe_prototxt(self.shuffle, caffe_net, layer)
     return layer
Exemplo n.º 16
0
def RemPoseNet_Train(net, data_layer="data", label_layer="label"):
    # input
    net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \
        L.Slice(net[label_layer], ntop=4, slice_param=dict(slice_point=[34,52,86], axis=1))
    # label
    net.vec_label = L.Eltwise(net.vec_mask,
                              net.vec_temp,
                              eltwise_param=dict(operation=P.Eltwise.PROD))
    net.heat_label = L.Eltwise(net.heat_mask,
                               net.heat_temp,
                               eltwise_param=dict(operation=P.Eltwise.PROD))
    # BaseNet
    net = RemBaseNet(net,
                     from_layer=data_layer,
                     use_bn=base_use_bn,
                     use_conv6=False,
                     lr=1,
                     decay=1)
    # Stage-5
    stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers))
    if use_stride_conv[4]:
        stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers) - 1)

# Stage-4
    stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers))
    if use_stride_conv[3]:
        stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers) - 1)
    net = UnifiedMultiScaleLayers(net,
                                  layers=[stage_4, stage_5],
                                  tags=["Ref", "Up"],
                                  unifiedlayer="convf",
                                  upsampleMethod="Reorg")
    # Stages
    baselayer = "convf"
    stage_lr = 1
    # STG#1
    net = RemPoseStage_Train(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \
                    mask_vec="vec_mask", mask_heat="heat_mask", \
                    label_vec="vec_label", label_heat="heat_label", \
                    short_cut=True, base_layer=baselayer, lr=stage_lr, decay=1)
    # STG#2
    net = RemPoseStage_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \
                    mask_vec="vec_mask", mask_heat="heat_mask", \
                    label_vec="vec_label", label_heat="heat_label", \
                    short_cut=True, base_layer=baselayer, lr=stage_lr, decay=1)
    # STG#3
    net = RemPoseStage_Train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \
                    mask_vec="vec_mask", mask_heat="heat_mask", \
                    label_vec="vec_label", label_heat="heat_label", \
                    short_cut=False, base_layer=baselayer, lr=stage_lr, decay=1)
    return net
Exemplo n.º 17
0
def generate_scores(split, config):

    n = caffe.NetSpec()
    batch_size = config.N
    mode_str = str(dict(split=split, batch_size=batch_size))
    n.language, n.cont, n.img_feature, n.spatial, n.label = L.Python(module=config.data_provider,
                                                                     layer='TossLayer',
                                                                     param_str=mode_str,
                                                                     ntop=5)
    # embedding
    n.embed = L.Embed(n.language, input_dim=config.vocab_size,
                      num_output=config.embed_dim,
                      weight_filler=dict(type='uniform', min=-0.08, max=0.08))

    # LSTM
    n.lstm = L.LSTM(n.embed, n.cont,
                    recurrent_param=dict(num_output=config.lstm_dim,
                                         weight_filler=dict(type='uniform', min=-0.08, max=0.08),
                                         bias_filler=dict(type='constant', value=0)))
    tops = L.Slice(n.lstm, ntop=config.T, slice_param=dict(axis=0))
    for i in range(config.T - 1):
        n.__setattr__('slice'+str(i), tops[i])
        n.__setattr__('silence'+str(i), L.Silence(tops[i], ntop=0))
    n.lstm_out = tops[-1]
    n.lstm_feat = L.Reshape(n.lstm_out, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim])))

    # L2 Normalize image and language features
    n.img_l2norm = L.L2Normalize(n.img_feature)
    n.lstm_l2norm = L.L2Normalize(n.lstm_feat)
    n.img_l2norm_resh = L.Reshape(n.img_l2norm,
                                  reshape_param=dict(shape=dict(dim=[-1, config.D_im])))
    n.lstm_l2norm_resh = L.Reshape(n.lstm_l2norm,
                                  reshape_param=dict(shape=dict(dim=[-1, config.D_text])))

    # Concatenate
    n.feat_all = L.Concat(n.lstm_l2norm_resh, n.img_l2norm_resh, n.spatial, concat_param=dict(axis=1))

    # MLP Classifier over concatenated feature
    n.mlp_l1, n.mlp_relu1 = fc_relu(n.feat_all, config.mlp_hidden_dims)
    if config.mlp_dropout:
        n.mlp_drop1 = L.Dropout(n.mlp_relu1, dropout_ratio=0.5, in_place=True)
        n.scores = fc(n.mlp_drop1, 1)
    else:
        n.scores = fc(n.mlp_relu1, 1)

    # Loss Layer
    n.loss = L.SigmoidCrossEntropyLoss(n.scores, n.label)

    return n.to_proto()
Exemplo n.º 18
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define split operation for input blobs
        """
        # get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')

        # get attr
        # required field
        # WARNING: size_split is only required in Caffe, not TF or MXNet
        size_split = self.get_attr('size_split', default=None)
        if size_split is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'size_split', self.name))
        slice_point = []
        for idx, val in enumerate(size_split):
            if idx == 0:
                slice_point.insert(idx, val)
            else:
                slice_point.insert(idx, val + size_split[idx])

        # optional field
        axis = self.get_attr('axis', default=0)
        slice_param = {'axis': axis, 'slice_point': slice_point}

        # get output dimension
        outdim = []
        ntop = len(size_split)
        for i in range(ntop):
            outdim.insert(i, [])
            for j in range(len(indim)):
                if j != axis:
                    outdim[i].insert(j, indim[j])
                else:
                    outdim[i].insert(j, size_split[j])

        slice = L.Slice(input_,
                        name=self.name,
                        slice_param=slice_param,
                        ntop=ntop)

        # set output
        for idx, val in enumerate(slice):
            self.set_output('output{0}'.fomrmat(idx), val)
            self.set_dimension('output{0}'.format(idx), outdim[idx])
Exemplo n.º 19
0
def res50_train(mean_value, list_file, is_train, batch_size):
    # setup the python data layer
    net = caffe.NetSpec()
    net.data, net.label \
                    = L.ReidData(transform_param=dict(mirror=True,crop_size=224,mean_value=mean_value),
             reid_data_param=dict(source=list_file,batch_size=batch_size, new_height=256, new_width=256,
                pos_fraction=1,neg_fraction=1,pos_limit=1,neg_limit=4,pos_factor=1, neg_factor=1.01),
             ntop = 2)

    net, final = res50_body(net, 'data', '', is_train)

    param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]
    net['score'] = fc_relu(net[final],
                           nout=751,
                           is_train=is_train,
                           has_relu=False,
                           param=param)
    #net['euclidean'], net['label_dif'] = L.PairEuclidean(net[final], net['label'], ntop = 2)
    net['label_dif'] = L.PairReidLabel(net['label'],
                                       propagate_down=[0],
                                       ntop=1)

    net['feature_a'], net['feature_b'] = L.Slice(net[final],
                                                 slice_param=dict(
                                                     axis=0,
                                                     slice_point=batch_size),
                                                 ntop=2)
    net['euclidean'] = L.Eltwise(net['feature_a'],
                                 net['feature_b'],
                                 operation=P.Eltwise.PROD)
    net['score_dif'] = fc_relu(net['euclidean'],
                               nout=2,
                               is_train=is_train,
                               has_relu=False,
                               param=param)

    net['loss'] = L.SoftmaxWithLoss(net['score'],
                                    net['label'],
                                    propagate_down=[1, 0],
                                    loss_weight=0.5)
    net['loss_dif'] = L.SoftmaxWithLoss(net['score_dif'],
                                        net['label_dif'],
                                        propagate_down=[1, 0],
                                        loss_weight=1)
    return str(net.to_proto())
Exemplo n.º 20
0
def bilinear_interpolation_fixed(net, input, level, num_output=1, pad=1, kernel_size=4, stride=2):

    slice_namex = 'slice_x_{}'.format(level)
    slice_namey = 'slice_y_{}'.format(level)
    slice_conv_namex = 'slice_conv_x_{}'.format(level)
    slice_conv_namey = 'slice_conv_y{}'.format(level)

    input_x, input_y = L.Slice(input, slice_param=dict(axis=1, slice_point=1),  ntop=2)
    setattr(net, slice_namex, input_x)
    setattr(net, slice_namey, input_y)    

    output_x, output_y = L.Deconvolution(input_x, input_y, param=dict(lr_mult=0, decay_mult=0),
        convolution_param=dict(num_output=num_output, pad=pad, kernel_size=kernel_size, stride=stride, 
            weight_filler=dict(type='bilinear'), bias_term=False, engine=2 ),  ntop=2)  
    setattr(net, slice_conv_namex, output_x)
    setattr(net, slice_conv_namey, output_y)
    output = L.Concat(output_x, output_y, concat_param=dict(axis=1))
    return net, output
Exemplo n.º 21
0
def _make_module(model_path, n, i_channels, i_size, axis, slice_point):
    ns = caffe.NetSpec()
    ns.data = L.Input(
        name="data",
        input_param={"shape": {
            "dim": [n, i_channels, i_size[0], i_size[1]]
        }})
    # when ntop > 2, it seems that there is a bug for slice in caffe
    ns.s1, ns.s2 = L.Slice(ns.data,
                           name='slice',
                           ntop=2,
                           slice_point=slice_point,
                           axis=axis)

    with open(os.path.join(model_path, 'test.prototxt'), 'w') as f:
        f.write(str(ns.to_proto()))

    net = caffe.Net(f.name, caffe.TEST)

    net.save(os.path.join(model_path, 'test.caffemodel'))
def concat_slice_net():
    n = caffe.NetSpec()
    n.data = L.DummyData(dummy_data_param=dict(num=20,
                                               channels=50,
                                               height=64,
                                               width=64,
                                               data_filler=dict(
                                                   type="gaussian")))
    # 将输入的data层分为a,b,c输出,slice_point比Slice的个数少1
    # 如本例将输入的data层分为a,b,c输出,即top有三个,slice_point则有两个,
    # 其中第一个slice_point=20是top:"a"的个数,第二个slice_point=30是top:"b"+top:"a"的个数
    # 而top:"c"的个数:channels-第二个slice_point=50-30=20,
    # 因此a,b,c的channels分别是:20,10,20
    n.a, n.b, n.c = L.Slice(n.data, ntop=3, slice_point=[20, 30], axis=0)
    n.d = L.Concat(n.a, n.b, axis=0)

    # Eltwise层的操作有三个:product(点乘), sum(相加减) 和 max(取大值),其中sum是默认操作
    n.e = L.Eltwise(n.a, n.c)

    return n.to_proto()
Exemplo n.º 23
0
 def modeltrain(hdf5s,hdf5t, batch_size):
     #logistic regression: data, matrix multiplication, and 2-class softmax loss
     n = caffe.NetSpec()
     n.source_data, n.lp_label= L.HDF5Data(batch_size=batch_size, source=hdf5s, ntop=2, shuffle=False)
     n.source_domain_labels= L.DummyData(data_filler=dict(type='constant', value=0), num=batch_size, channels=1, height=1, width=1)
     #n.target_data, n.lp_target_label, n.bag_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=3, shuffle=False)
     #n.target_data, n.lp_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=2, shuffle=False)
     #n.target_data, n.lp_target_label, n.instance_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=3, shuffle=False)
     n.target_data, n.lp_target_label, n.bag_target_label, n.instance_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=4, shuffle=False)
     n.target_domain_labels=L.DummyData(data_filler=dict(type='constant', value=1), num=batch_size, channels=1, height=1, width=1)
     bottom_layers_data=[n.source_data, n.target_data]
     n.data=L.Concat(*bottom_layers_data, concat_dim=0)
     bottom_layers_domain=[n.source_domain_labels, n.target_domain_labels]
     n.dc_label=L.Concat(*bottom_layers_domain, concat_dim=0)
     n.ip1= L.InnerProduct(n.data, num_output=neuronL1, weight_filler=dict(type='xavier'))
     n.relu1 = L.Sigmoid(n.ip1, in_place=True)
     #n.dropout1 = L.Dropout(n.relu1, dropout_ratio=0.5)
     n.ip2= L.InnerProduct(n.relu1, num_output=neuronL1-400, weight_filler=dict(type='xavier'))
     n.source_feature, n.target_feature = L.Slice(n.ip2, slice_dim=0, ntop=2)
     #L.Silence(n.target_feature);
     #clfe.fit(n.source_feature, n.lp_label)	
     #n.real, n.ip3 = L.Python(n.target_feature, n.lp_target_label, n.bag_label, module= 'missSVM', layer='missSVMLayer', ntop=2)
     n.ip3 = L.InnerProduct(n.source_feature, num_output=1, weight_filler=dict(type='xavier'))
     #n.ip3=L.Sigmoid(n.ip33, in_place=True)
     n.ip4= L.InnerProduct(n.target_feature, num_output=1, weight_filler=dict(type='xavier'))
     #n.ip5=L.Sigmoid(n.ip4, in_place=True)
     #n.ll=clfe.predict(n.source_feature)
     #n.accuracy = L.Accuracy(n.ip4, n.lp_target_label)
     #n.losslp = L.Python(n.ip4, n.lp_target_label, n.bag_target_label, module = 'GMloss', layer='MultipleInstanceLossLayer')
     #n.P, n.Y = L.Python(n.ip4, n.lp_target_label, n.bag_target_label, module = 'MIloss', layer='MultipleInstanceLossLayer', ntop=2) 
     #n.losslp = L.SigmoidCrossEntropyLoss(n.P, n.Y)
     n.losslp = L.SigmoidCrossEntropyLoss(n.ip4, n.lp_target_label)
     n.losslps = L.SigmoidCrossEntropyLoss(n.ip3, n.lp_label)
     n.grl= L.GradientScaler(n.ip2, lower_bound=0.0)
     n.ip11= L.InnerProduct(n.grl, num_output=300, weight_filler=dict(type='xavier'))
     n.relu11 = L.Sigmoid(n.ip11, in_place=True)
     n.dropout11 = L.Dropout(n.relu11, dropout_ratio=0.5)
     n.ip12 = L.InnerProduct(n.dropout11, num_output=1, weight_filler=dict(type='xavier'))
     #n.final = L.Sigmoid(n.ip12, in_place=True)
     n.lossdc = L.SigmoidCrossEntropyLoss(n.ip12, n.dc_label, loss_weight=0.1)
     return n.to_proto()
Exemplo n.º 24
0
    def language_model_lstm_no_embed(self,
                                     sent_bottom,
                                     cont_bottom,
                                     text_name='embedding_text',
                                     tag=''):

        lstm_lr = self.args.lstm_lr
        embedding_lr = self.args.language_embedding_lr

        lstm = L.LSTM(
            sent_bottom,
            cont_bottom,
            recurrent_param=dict(num_output=self.language_embedding_dim[0],
                                 weight_filler=self.uniform_weight_filler(
                                     -0.08, 0.08),
                                 bias_filler=self.constant_filler(0)),
            param=self.learning_params(
                [[lstm_lr, lstm_lr], [lstm_lr, lstm_lr], [lstm_lr, lstm_lr]],
                ['lstm1' + tag, 'lstm2' + tag, 'lstm3' + tag]))
        lstm_slices = L.Slice(lstm,
                              slice_point=self.params['sentence_length'] - 1,
                              axis=0,
                              ntop=2)
        self.n.tops['silence_cell_' + str(self.silence_count)] = L.Silence(
            lstm_slices[0], ntop=0)
        self.silence_count += 1
        top_lstm = L.Reshape(
            lstm_slices[1],
            shape=dict(dim=[-1, self.language_embedding_dim[0]]))
        top_text = L.InnerProduct(
            top_lstm,
            num_output=self.language_embedding_dim[1],
            weight_filler=self.uniform_weight_filler(-0.08, .08),
            bias_filler=self.constant_filler(0),
            param=self.learning_params(
                [[embedding_lr, embedding_lr], [embedding_lr * 2, 0]],
                ['lstm_embed1' + tag, 'lstm_embed_1b' + tag]))

        setattr(self.n, text_name, top_text)
        return top_text
Exemplo n.º 25
0
def data_layer_trimese(net,
                       inputdb,
                       mean_file,
                       batch_size,
                       net_type,
                       height,
                       width,
                       nchannels,
                       crop_size=-1):
    data, label = data_layer_stacked(net,
                                     inputdb,
                                     mean_file,
                                     batch_size,
                                     net_type,
                                     height,
                                     width,
                                     nchannels,
                                     crop_size=crop_size)
    slices = L.Slice(data[0],
                     ntop=3,
                     name="data_trimese",
                     slice_param=dict(axis=1, slice_point=[1, 2]))
    return slices, label
Exemplo n.º 26
0
def setLayers_twoBranches(data_source,
                          batch_size,
                          layername,
                          kernel,
                          stride,
                          outCH,
                          label_name,
                          transform_param_in,
                          deploy=False,
                          batchnorm=0,
                          lr_mult_distro=[1, 1, 1]):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)
    num_parts = transform_param['num_parts']

    if deploy == False and "lmdb" not in data_source:
        if (len(label_name) == 1):
            n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict(
                batch_size=batch_size, source=data_source),
                                                       ntop=2)
        elif (len(label_name) == 2):
            n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data(
                hdf5_data_param=dict(batch_size=batch_size,
                                     source=data_source),
                ntop=3)
    # produce data definition for deploy net
    elif deploy == False:
        n.data, n.tops['label'] = L.CPMData(
            data_param=dict(backend=1,
                            source=data_source,
                            batch_size=batch_size),
            cpm_transform_param=transform_param_in,
            ntop=2)
        n.tops[label_name[2]], n.tops[label_name[3]], n.tops[
            label_name[4]], n.tops[label_name[5]] = L.Slice(
                n.label,
                slice_param=dict(
                    axis=1, slice_point=[38, num_parts + 1, num_parts + 39]),
                ntop=4)
        n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]],
                                          n.tops[label_name[4]],
                                          operation=P.Eltwise.PROD)
        n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]],
                                          n.tops[label_name[5]],
                                          operation=P.Eltwise.PROD)

    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.silence2 = L.Silence(n.center_map, ntop=0)
    #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = ['image', 'image']
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    local_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'V':  #pretrained VGG layers
            conv_name = 'conv%d_%d' % (pool_counter, local_counter)
            lr_m = lr_mult_distro[0]
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)
            ReLUname = 'relu%d_%d' % (pool_counter, local_counter)
            n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True)
            local_counter += 1
            print ReLUname
        if layername[l] == 'B':
            pool_counter += 1
            local_counter = 1
        if layername[l] == 'C':
            if state == 'image':
                #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                conv_name = 'conv%d_%d_CPM' % (
                    pool_counter, local_counter
                )  # no image state in subsequent stages
                if stage == 1:
                    lr_m = lr_mult_distro[1]
                else:
                    lr_m = lr_mult_distro[1]
            else:  # fuse
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
                lr_m = lr_mult_distro[2]
                conv_counter += 1
            #if stage == 1:
            #    lr_m = 1
            #else:
            #    lr_m = lr_sub
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)

            if layername[l + 1] != 'L':
                if (state == 'image'):
                    if (batchnorm == 1):
                        batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                else:
                    if (batchnorm == 1):
                        batchnorm_name = 'Mbn%d_stage%d' % (conv_counter,
                                                            stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                #last_layer = ReLUname
                print ReLUname

            #conv_counter += 1
            local_counter += 1

        elif layername[l] == 'C2':
            for level in range(0, 2):
                if state == 'image':
                    #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                    conv_name = 'conv%d_%d_CPM_L%d' % (
                        pool_counter, local_counter, level + 1
                    )  # no image state in subsequent stages
                    if stage == 1:
                        lr_m = lr_mult_distro[1]
                    else:
                        lr_m = lr_mult_distro[1]
                else:  # fuse
                    conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage,
                                                         level + 1)
                    lr_m = lr_mult_distro[2]
                    #conv_counter += 1
                #if stage == 1:
                #    lr_m = 1
                #else:
                #    lr_m = lr_sub
                if layername[l + 1] == 'L2' or layername[l + 1] == 'L3':
                    if level == 0:
                        outCH[l] = 38
                    else:
                        outCH[l] = 19

                n.tops[conv_name] = L.Convolution(
                    n.tops[last_layer[level]],
                    kernel_size=kernel[l],
                    num_output=outCH[l],
                    pad=int(math.floor(kernel[l] / 2)),
                    param=[
                        dict(lr_mult=lr_m, decay_mult=1),
                        dict(lr_mult=lr_m * 2, decay_mult=0)
                    ],
                    weight_filler=dict(type='gaussian', std=0.01),
                    bias_filler=dict(type='constant'))
                last_layer[level] = conv_name
                print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m)

                if layername[l + 1] != 'L2' and layername[l + 1] != 'L3':
                    if (state == 'image'):
                        if (batchnorm == 1):
                            batchnorm_name = 'bn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                        ReLUname = 'relu%d_%d_CPM_L%d' % (
                            pool_counter, local_counter, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    else:
                        if (batchnorm == 1):
                            batchnorm_name = 'Mbn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter,
                                                            stage, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    print ReLUname

            conv_counter += 1
            local_counter += 1

        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
            local_counter = 1
            conv_counter += 1
            print last_layer[0]

        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False and "lmdb" not in data_source:
                n.tops['map_vec_stage%d' % stage] = L.Flatten(
                    n.tops[last_layer[0]])
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]])
            elif deploy == False:
                level = 1
                name = 'weight_stage%d' % stage
                n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                         n.tops[label_name[(level + 2)]],
                                         operation=P.Eltwise.PROD)
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops[name], n.tops[label_name[level]])

            print 'loss %d' % stage
            stage += 1
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L2':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            for level in range(0, 2):
                if deploy == False and "lmdb" not in data_source:
                    n.tops['map_vec_stage%d_L%d' %
                           (stage, level + 1)] = L.Flatten(
                               n.tops[last_layer[level]])
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops['map_vec_stage%d' % stage],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])
                elif deploy == False:
                    name = 'weight_stage%d_L%d' % (stage, level + 1)
                    n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                             n.tops[label_name[(level + 2)]],
                                             operation=P.Eltwise.PROD)
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops[name],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])

                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L3':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            if deploy == False:
                level = 0
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.Euclidean2Loss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           n.tops[label_name[2]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)
                level = 1
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.EuclideanLoss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer[0]],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            #if not share_point:
            #    share_point = last_layer
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer[0]],
                n.tops[last_layer[1]],
                n.tops[share_point],
                concat_param=dict(axis=1))

            local_counter = 1
            state = 'fuse'
            last_layer[0] = 'concat_stage%d' % stage
            last_layer[1] = 'concat_stage%d' % stage
            print last_layer
        elif layername[l] == '$':
            share_point = last_layer[0]
            pool_counter += 1
            local_counter = 1
            print 'share'

    # final process
    stage -= 1
    #if stage == 1:
    #    n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
Exemplo n.º 27
0
def qlstm(mode, batchsize, T, question_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode': mode, 'batchsize': batchsize})
    n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 )

    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    n.embed = L.TanH(n.embed_ba)
    concat_word_embed = [n.embed, n.glove]
    n.concat_embed = L.Concat(*concat_word_embed,
                              concat_param={'axis': 2})  # T x N x 600

    # LSTM1
    n.lstm1 = L.LSTM(\
                   n.concat_embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis': 0})
    for i in xrange(T - 1):
        n.__setattr__('slice_first' + str(i), tops1[int(i)])
        n.__setattr__('silence_data_first' + str(i),
                      L.Silence(tops1[int(i)], ntop=0))
    n.lstm1_out = tops1[T - 1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,
                                        dropout_param={'dropout_ratio': 0.3})
    n.lstm1_droped = L.Dropout(n.lstm1, dropout_param={'dropout_ratio': 0.3})
    # LSTM2
    n.lstm2 = L.LSTM(\
                   n.lstm1_droped, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis': 0})
    for i in xrange(T - 1):
        n.__setattr__('slice_second' + str(i), tops2[int(i)])
        n.__setattr__('silence_data_second' + str(i),
                      L.Silence(tops2[int(i)], ntop=0))
    n.lstm2_out = tops2[T - 1]
    n.lstm2_reshaped = L.Reshape(n.lstm2_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,
                                        dropout_param={'dropout_ratio': 0.3})
    concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped]
    n.lstm_12 = L.Concat(*concat_botom)

    n.q_emb_tanh_droped_resh = L.Reshape(
        n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.q_emb_tanh_droped_resh,
                                              axis=2,
                                              tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1,
                                            axis=3,
                                            tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(
        n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled,
                               n.i_emb_tanh_droped_resh,
                               compact_bilinear_param=dict(num_output=16000,
                                                           sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,
                              dropout_param={'dropout_ratio': 0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped,
                                kernel_size=1,
                                stride=1,
                                num_output=512,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu,
                                kernel_size=1,
                                stride=1,
                                num_output=2,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(
        n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,
                      reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0,
                                     dummy)
    n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1,
                                     dummy)
    n.att_feature0_resh = L.Reshape(
        n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature1_resh = L.Reshape(
        n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(
        n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1])))
    n.lstm_12_resh = L.Reshape(
        n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh,
                                      n.lstm_12_resh,
                                      compact_bilinear_param=dict(
                                          num_output=16000, sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2,
                             dropout_param={'dropout_ratio': 0.1})
    n.bc_dropped_resh = L.Reshape(
        n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh,
                                  num_output=3000,
                                  weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
Exemplo n.º 28
0
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize,'folder':folder})
    if mode == 'val':
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \
            module='vqa_data_layer_hdf5', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 )
    else:
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
            module='vqa_data_layer_kld_hdf5', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 ) 
    n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
                         weight_filler=dict(type='xavier'))
    n.embed_tanh = L.TanH(n.embed) 
    concat_word_embed = [n.embed_tanh, n.glove]
    n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600

    # LSTM
    n.lstm1 = L.LSTM(\
                   n.concat_embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=config.LSTM_UNIT_NUM,\
                       weight_filler=dict(type='xavier')))
    n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':config.LSTM_DROPOUT_RATIO})
    n.lstm1_resh = L.Permute(n.lstm1_droped, permute_param=dict(order=[1,2,0]))
    n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \
            reshape_param=dict(shape=dict(dim=[0,0,0,1])))

    '''
    Question Attention
    '''
    n.qatt_conv1 = L.Convolution(n.lstm1_resh2, kernel_size=1, stride=1, num_output=512, pad=0,
                                           weight_filler=dict(type='xavier'))
    n.qatt_relu = L.ReLU(n.qatt_conv1)
    n.qatt_conv2 = L.Convolution(n.qatt_relu, kernel_size=1, stride=1, num_output=config.NUM_QUESTION_GLIMPSE, pad=0,
                                           weight_filler=dict(type='xavier')) 
    n.qatt_reshape = L.Reshape(n.qatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_QUESTION_GLIMPSE,config.MAX_WORDS_IN_QUESTION,1]))) # N*NUM_QUESTION_GLIMPSE*15
    n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2)

    qatt_maps = L.Slice(n.qatt_softmax,ntop=config.NUM_QUESTION_GLIMPSE,slice_param={'axis':1})
    dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    qatt_feature_list = []
    for i in xrange(config.NUM_QUESTION_GLIMPSE):
        if config.NUM_QUESTION_GLIMPSE == 1:
            n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm))
        else:
            n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm))    
        qatt_feature_list.append(n.__getattr__('qatt_feat%d'%i))
    n.qatt_feat_concat = L.Concat(*qatt_feature_list) 
    '''
    Image Attention with MFB
    '''
    n.q_feat_resh = L.Reshape(n.qatt_feat_concat,reshape_param=dict(shape=dict(dim=[0,-1,1,1])))
    n.i_feat_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[0,-1,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH])))
    
    n.iatt_q_proj = L.InnerProduct(n.q_feat_resh, num_output = config.JOINT_EMB_SIZE, 
                                   weight_filler=dict(type='xavier'))
    n.iatt_q_resh = L.Reshape(n.iatt_q_proj, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,1,1])))  
    n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH)
    n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1, axis=3, tiles=config.IMG_FEAT_WIDTH)


    n.iatt_i_conv = L.Convolution(n.i_feat_resh, kernel_size=1, stride=1, num_output=config.JOINT_EMB_SIZE, pad=0,
                                 weight_filler=dict(type='xavier')) 
    n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,
                                                                      config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH])))
    n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2, n.iatt_i_resh1, eltwise_param=dict(operation=0))
    n.iatt_iq_droped = L.Dropout(n.iatt_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO})
    n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,config.IMG_FEAT_SIZE,1])))
    n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2, permute_param=dict(order=[0,2,1,3]))
    n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_permute1, reshape_param=dict(shape=dict(dim=[-1,config.IMG_FEAT_SIZE,
                                                                       config.MFB_OUT_DIM,config.MFB_FACTOR_NUM])))
    n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \
                              pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool, permute_param=dict(order=[0,2,1,3]))
    
    n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2)
    n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt)


    ## 2 conv layers 1000 -> 512 -> 2
    n.iatt_conv1 = L.Convolution(n.iatt_iq_l2, kernel_size=1, stride=1, num_output=512, pad=0, 
                                weight_filler=dict(type='xavier'))
    n.iatt_relu = L.ReLU(n.iatt_conv1)
    n.iatt_conv2 = L.Convolution(n.iatt_relu, kernel_size=1, stride=1, num_output=config.NUM_IMG_GLIMPSE, pad=0,
                                           weight_filler=dict(type='xavier')) 
    n.iatt_resh = L.Reshape(n.iatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_SIZE])))
    n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2)
    n.iatt_softmax_resh = L.Reshape(n.iatt_softmax,reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH])))
    iatt_maps = L.Slice(n.iatt_softmax_resh, ntop=config.NUM_IMG_GLIMPSE,slice_param={'axis':1})
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    iatt_feature_list = []
    for i in xrange(config.NUM_IMG_GLIMPSE):
        if config.NUM_IMG_GLIMPSE == 1:
            n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps, dummy))
        else:
            n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy))
        n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \
                                reshape_param=dict(shape=dict(dim=[0,-1]))))
        iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh'%i))
    n.iatt_feat_concat = L.Concat(*iatt_feature_list)
    n.iatt_feat_concat_resh = L.Reshape(n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0,-1,1,1])))
    
    '''
    Fine-grained Image-Question MFB fusion
    '''

    n.mfb_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, 
                                  weight_filler=dict(type='xavier'))
    n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh, num_output=config.JOINT_EMB_SIZE, 
                                  weight_filler=dict(type='xavier'))
    n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0))
    n.mfb_iq_drop = L.Dropout(n.mfb_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO})
    n.mfb_iq_resh = L.Reshape(n.mfb_iq_drop, reshape_param=dict(shape=dict(dim=[-1,1,config.MFB_OUT_DIM,config.MFB_FACTOR_NUM])))
    n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \
                                      pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\
                                    reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM])))
    n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out)
    n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) 
    
    n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier')) 
    if mode == 'val':
        n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    else:
        n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) 
    return n.to_proto()
Exemplo n.º 29
0
    def gru_unit(self,
                 prefix,
                 x,
                 cont,
                 static=None,
                 h=None,
                 batch_size=100,
                 timestep=0,
                 gru_hidden=1000,
                 weight_lr_mult=1,
                 bias_lr_mult=2,
                 weight_decay_mult=1,
                 bias_decay_mult=0,
                 concat_hidden=True,
                 weight_filler=None,
                 bias_filler=None):

        #assume static input already transformed

        if not weight_filler:
            weight_filler = self.uniform_weight_filler(-0.08, 0.08)
        if not bias_filler:
            bias_filler = self.constant_filler(0)
        if not h:
            h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1)

        def get_name(name):
            return '%s_%s' % (prefix, name)

        def get_param(weight_name, bias_name=None):
            #TODO: write this in terms of earlier method "init_params"
            w = dict(lr_mult=weight_lr_mult,
                     decay_mult=weight_decay_mult,
                     name=get_name(weight_name))
            if bias_name is not None:
                b = dict(lr_mult=bias_lr_mult,
                         decay_mult=bias_decay_mult,
                         name=get_name(bias_name))
                return [w, b]
            return [w]

        gate_dim = gru_hidden * 3

        #transform x_t
        x = L.InnerProduct(x,
                           num_output=gate_dim,
                           axis=2,
                           weight_filler=weight_filler,
                           bias_filler=bias_filler,
                           param=get_param('W_xc', 'b_c'))
        self.rename_tops(x, get_name('%d_x_transform' % timestep))

        #transform h
        h_conted = L.Scale(h, cont, axis=0)
        h = L.InnerProduct(h_conted,
                           num_output=gru_hidden * 2,
                           axis=2,
                           bias_term=False,
                           weight_filler=weight_filler,
                           param=get_param('W_hc'))
        h_name = get_name('%d_h_transform' % timestep)
        if not hasattr(self.n, h_name):
            setattr(self.n, h_name, h)

        #gru stuff TODO: write GRUUnit in caffe?  would make all this much prettier.
        x_transform_z_r, x_transform_hc = L.Slice(x,
                                                  slice_point=gru_hidden * 2,
                                                  axis=2,
                                                  ntop=2)
        sum_items = [x_transform_z_r, h]
        if static:
            sum_items += static
        z_r_sum = self.sum(sum_items)
        z_r = L.Sigmoid(z_r_sum)
        z, r = L.Slice(z_r, slice_point=gru_hidden, axis=2, ntop=2)

        z_weighted_h = self.prod([r, h_conted])
        z_h_transform = L.InnerProduct(z_weighted_h,
                                       num_output=gru_hidden,
                                       axis=2,
                                       bias_term=False,
                                       weight_filler=weight_filler,
                                       param=get_param('W_hzc'))
        sum_items = [x_transform_hc, z_h_transform]
        if static:
            sum_items += static
        hc_sum = self.sum(sum_items)
        hc = L.TanH(hc)

        zm1 = L.Power(z, scale=-1, shift=1)
        h_h = self.prod([zm1, h_conted])
        h_hc = self.prod([z, hc])
        h = self.sum([h_h, h_hc])

        return h
Exemplo n.º 30
0
    def va_net_proto(self, batch_size, train=True):
        n = caffe.NetSpec()
        # if train:
        #     source_data = '../prepare_data/AFEW-VA/crop/train_data_lmdb'
        #     source_label = '../prepare_data/AFEW-VA/crop/train_label_lmdb'
        #     mu = tools.get_mu('../prepare_data/AFEW-VA/crop/train_data.binaryproto')
        # else:
        #     source_data = '../prepare_data/AFEW-VA/crop/test_data_lmdb'
        #     source_label = '../prepare_data/AFEW-VA/crop/test_label_lmdb'
        #     mu = tools.get_mu('../prepare_data/AFEW-VA/crop/test_data.binaryproto')
        #
        # n.data = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=1,
        #                          transform_param=dict(scale=1. / 255, mean_value=mu),
        #                          input_param=dict(shape=dict(dim=[batch_size, 3, 170, 170])))
        # n.label = L.Data(source=source_label, backend=P.Data.LMDB, batch_size=batch_size, ntop=1)

        if train:
            data_layer_params = dict(batch_size=batch_size,
                                     im_shape=[170, 170],
                                     split='train',
                                     data_root=data_root,
                                     mean_file=train_mean_file)
        else:
            data_layer_params = dict(batch_size=batch_size,
                                     im_shape=[170, 170],
                                     split='test',
                                     data_root=data_root,
                                     mean_file=test_mean_file)

        n.data, n.label = L.Python(module='va_datalayer',
                                   layer='VADataLayerSync',
                                   ntop=2,
                                   param_str=str(data_layer_params))

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = block_def.conv_bn_scale_relu(
            n.data, ks=11, nout=256, stride=4, pad=0, learn_all=self.learn_all)

        n.res0, n.conv2_bn, n.conv2_scale, n.conv2_relu = block_def.conv_bn_scale_relu(
            n.conv1, ks=9, nout=128, stride=2, pad=0, learn_all=self.learn_all)

        n_core = 8
        n_attr = 2
        n_au = 2

        # 8 层 rpoly-2 for core layer
        for num in range(n_core):
            exec(
                'n.conv{0}_1, n.relu{0}_1, n.conv{0}_2, n.relu{0}_2, n.conv{0}_3, n.relu{0}_3, n.res{0} ='
                'block_def.rPoly2(n.res{1}, learn_all=self.learn_all)'.format(
                    str(num + 1), str(num)))

        # Core Layer to 4 Attribute Layer
        # exec('n.res{0}_face, n.res{0}_eye, n.res{0}_eyebrow, n.res{0}_mouth = L.Split(n.res{0}, ntop=4)'
        #      .format(str(n_core)))
        exec(
            'n.res{0}_eye, n.res{0}_eyebrow, n.res{0}_mouth = L.Split(n.res{0}, ntop=3)'
            .format(str(n_core)))

        # # 2 层 rpoly-2 for attribute layer -- Face Layer
        # for num in range(n_attr):
        #     exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, '
        #         'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)'.
        #         format(str(num + n_core + 1), str(num + n_core), 'face'))

        # 2 层 rpoly-2 for attribute layer -- Eye Layer
        for num in range(n_attr):
            exec(
                'n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, '
                'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)'
                .format(str(num + n_core + 1), str(num + n_core), 'eye'))

        # 2 层 rpoly-2 for attribute layer -- Eyebrow Layer
        for num in range(n_attr):
            exec(
                'n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, '
                'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)'
                .format(str(num + n_core + 1), str(num + n_core), 'eyebrow'))

        # 2 层 rpoly-2 for attribute layer -- Mouth Layer
        for num in range(n_attr):
            exec(
                'n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, '
                'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)'
                .format(str(num + n_core + 1), str(num + n_core), 'mouth'))

        ########################################
        # Eye Layer to 2 AU Layer
        exec('n.res{0}_AU6_7, n.res{0}_AU45 = L.Split(n.res{0}_eye, ntop=2)'.
             format(str(n_core + n_attr)))

        # 2 层 rpoly-3 for AU layer -- AU6_7
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU6_7'))

        # 2 层 rpoly-3 for AU layer -- AU45
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU45'))

        # Eyebrow Layer to 3 AU Layer
        exec(
            'n.res{0}_AU1, n.res{0}_AU2, n.res{0}_AU4 = L.Split(n.res{0}_eyebrow, ntop=3)'
            .format(str(n_core + n_attr)))

        # 2 层 rpoly-3 for AU layer -- AU1
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU1'))

        # 2 层 rpoly-3 for AU layer -- AU2
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU2'))

        # 2 层 rpoly-3 for AU layer -- AU4
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU4'))

        # Mouth Layer to 3 AU Layer
        exec(
            'n.res{0}_Chin, n.res{0}_Lip, n.res{0}_Mouth_AU = L.Split(n.res{0}_mouth, ntop=3)'
            .format(str(n_core + n_attr)))

        # 2 层 rpoly-3 for AU layer -- Chin
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'Chin'))

        # 2 层 rpoly-3 for AU layer -- Lip_c & Lip_u
        # for num in range(n_au):
        #     exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
        #            'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
        #          .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'Lip'))

        exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
             'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
             .format(str(n_core + n_attr + 1), str(n_core + n_attr), 'Lip'))

        # exec('n.res{0}_Lip_c, n.res{0}_Lip_u = L.Split(n.res{0}_Lip, ntop=2)'.format(str(n_core + n_attr + 1)))
        exec('n.res{0}_Lip_c = L.Split(n.res{0}_Lip, ntop=1)'.format(
            str(n_core + n_attr + 1)))

        exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
             'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
             .format(str(n_core + n_attr + 2), str(n_core + n_attr +1), 'Lip_c'))

        # exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
        #      'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
        #      .format(str(n_core + n_attr + 2), str(n_core + n_attr +1), 'Lip_u'))

        # 2 层 rpoly-3 for AU layer -- Mouth_AU
        for num in range(n_au):
            exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \
                   'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})'
                 .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'Mouth_AU'))

        ########################################
        # AU6_7, AU45, AU4, Lip_c, Mouth_AU for Valence layer
        exec('n.res_Val = L.Concat(n.res{0}_AU6_7, n.res{0}_AU45, n.res{0}_AU4, n.res{0}_Lip_c, n.res{0}_Mouth_AU, axis=1)'\
            .format(str(n_core+n_attr+n_au)))

        # AU45, AU1, AU2, AU4, Chin for Arousal layer

        exec('n.res_Aro = L.Concat(n.res{0}_AU45, n.res{0}_AU1, n.res{0}_AU2, n.res{0}_AU4, n.res{0}_Chin, axis=1)' \
            .format(str(n_core + n_attr + n_au)))

        # va labels
        n.Val_label, n.Aro_label = L.Slice(n.label,
                                           name='slice',
                                           axis=1,
                                           slice_point=[1],
                                           ntop=2)
        va_layers = ['Val', 'Aro']
        out = [10, 10]

        for num in range(2):
            exec(
                'n.fc1_{0}, n.fc1_bn_{0}, n.fc1_drop_{0} = block_def.fc_bn_drop(n.res_{0}, num_output=1024, '
                'dropout_ratio=0.5)'.format(va_layers[num]))

            exec(
                'n.fc2_{0}, n.fc2_bn_{0}, n.fc2_drop_{0} = block_def.fc_bn_drop(n.fc1_{0}, num_output=1024, '
                'dropout_ratio=0.5)'.format(va_layers[num]))

            exec(
                'n.{0}_score = block_def.fc(n.fc2_{0}, num_output={1})'.format(
                    va_layers[num], str(out[num])))

            exec('n.loss_{0} = L.SoftmaxWithLoss(n.{0}_score, n.{0}_label)'.
                 format(va_layers[num]))

            exec('n.acc_{0} = L.Accuracy(n.{0}_score, n.{0}_label)'.format(
                va_layers[num]))

        if train:
            pass
        else:
            n.probs_Val = L.Softmax(n.Val_score)
            n.probs_Aro = L.Softmax(n.Aro_score)

        return n.to_proto()